In [None]:
import sys
sys.path.insert(0,'..')

In [None]:
import torch
import os

from train import train
import priors
import utils

import numpy as np

from datasets import load_openml_list, valid_dids_classification, test_dids_classification
from tabular import evaluate, get_model, get_default_spec
from tabular import bayes_net_metric, gp_metric, knn_metric, ridge_metric, catboost_metric, xgb_metric, logistic_metric

In [None]:
%load_ext autoreload

%autoreload 2

## Loading Datasets

In [None]:
### Loads small list of datasets
print('Loading test datasets...')
test_datasets, test_datasets_df = load_openml_list(test_dids_classification[0:2], filter_for_nan=True)
ds = test_datasets

print('\n Loading valid datasets...')
valid_datasets, valid_datasets_df = load_openml_list(valid_dids_classification[0:2], filter_for_nan=True)

In [None]:
### Loads all datasets
print('Loading test datasets...')
test_datasets, test_datasets_df = load_openml_list(test_dids_classification, filter_for_nan=True)
ds = test_datasets

print('\n Loading valid datasets...')
valid_datasets, valid_datasets_df = load_openml_list(valid_dids_classification, filter_for_nan=True)

## Setting params

In [None]:
device = 'cpu'

In [None]:
# After how many training samples should evaluatuion be done?
# Trained models have not been trained to evaluate after 30 samples
# so performance will drop
eval_positions = [30]

# What is the maximum number of features?
# Pretrained models have to use 60
max_features = 60

# How many samples should be loaded for one dataset?
# Samples after the training sequence are used for evaluation
seq_len = 100

# How many subsamples of datasets should be drawn for each dataset
max_samples = 20

In [None]:
gp_model_checkpoint_dir = "../results/tabular_model_gp.ckpt"
gp_model_config = {'batch_size': 512,
 'bptt': 100,
 'dropout': 0.5,
 'emsize': 512,
 'epochs': 100,
 'eval_positions': [10, 20, 40, 80],
 'lr': 6.271726842985807e-05,
 'nhead': 4,
 'nhid_factor': 2,
 'nlayers': 5,
 'num_features': 60,
 'prior_lengthscale': 0.00014803074521613278,
 'prior_noise': 0.001,
 'prior_normalize_by_used_features': True,
 'prior_num_features_used_sampler': {'uniform_int_sampler_f(1,max_features)': '<function <lambda>.<locals>.<lambda> at 0x7f21e832e550>'},
 'prior_order_y': False,
 'prior_outputscale': 2.3163584733185836,
 'prior_type': 'gp'}

In [None]:
bnn_model_checkpoint_dir = "../results/tabular_model_bnn.ckpt"
bnn_model_config = {'batch_size': 512,
 'bptt': 50,
 'dropout': 0.5,
 'emsize': 512,
 'epochs': 100,
 'eval_positions': [10, 20, 40],
 'lr': 1.6421403128751275e-05,
 'nhead': 4,
 'nhid_factor': 2,
 'nlayers': 5,
 'num_features': 60,
 'prior_activations': "<class 'torch.nn.modules.activation.Tanh'>",
 'prior_dropout_sampler': {'lambda: 0.0': '<function <lambda> at 0x7f613c1364c0>'},
 'prior_emsize_sampler': {'scaled_beta_sampler_f(2.0, 4.0, 150, 2)': '<function <lambda>.<locals>.<lambda> at 0x7f613c136310>'},
 'prior_is_causal': False,
 'prior_nlayers_sampler': {'lambda: 3': '<function <lambda> at 0x7f613c136790>'},
 'prior_noise_std_gamma_k': 1.8663049257557085,
 'prior_noise_std_gamma_theta': 0.05275478076173361,
 'prior_normalize_by_used_features': False,
 'prior_num_features_used_sampler': {'scaled_beta_sampler_f(1.0, 1.6, max_features, 2)': '<function <lambda>.<locals>.<lambda> at 0x7f613c136550>'},
 'prior_order_y': True,
 'prior_sigma_gamma_k': 3.6187797729244253,
 'prior_sigma_gamma_theta': 0.06773738681062867,
 'prior_type': 'mlp'}

## Loading PFN

In [None]:
model_type = 'bnn'
if model_type == 'gp':
    raise Exception("Not Implemented")
    config = gp_model_config
    checkpoint_dir = gp_model_checkpoint_dir
elif model_type == 'bnn':
    config = bnn_model_config
    checkpoint_dir = bnn_model_checkpoint_dir

model = get_model(config, device, eval_positions, should_train=False)
model_state, _ = torch.load(checkpoint_dir)
model[2].load_state_dict(model_state)
model = model[2]

## Evaluation of PFN and Baselines on all datasets

### Transformer

In [None]:
device = 'cpu'
result = evaluate(ds, model.to(device), 'transformer'
                  , max_features = max_features
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , rescale_features=config["prior_normalize_by_used_features"]
                  , extend_features=True, plot=False, overwrite=True, save=False)
result

### KNN

In [None]:
result = evaluate(ds, knn_metric, 'knn'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result

### Logistic Regression

In [None]:
result = evaluate(ds, logistic_metric, 'logistic'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result

### Gaussian Process

In [None]:
result = evaluate(ds, gp_metric, 'gp'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result

### XG Boost

In [None]:
result = evaluate(ds, xgb_metric, 'xgb'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result

### Bayesian NN

In [None]:
result = evaluate(ds, bayes_net_metric, 'bayes_net'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result

### Catboost

In [None]:
result = evaluate(ds, catboost_metric, 'catboost'
                  , bptt=seq_len
                  , eval_position_range=eval_positions
                  , device=device
                  , max_samples=20
                  , overwrite=True
                  , save=False)
result