In [15]:
import nn_utils
import builders
import importlib

from ray import tune
import optuna
from ray.tune.suggest.optuna import OptunaSearch
import torch

from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import ExperimentAnalysis
from ray.tune import register_trainable

import inspect
import argparse
import skorch
import os

from torch.utils import tensorboard
from sklearn import metrics

In [16]:
DATASETS = ["sylvine", "anneal", "adult", "jasmine", "nomao"]
AGGREGATORS = ["cls", "concatenate", "rnn", "sum", "mean", "max"]
BATCH_SIZE = 128
SEED = 11

In [17]:
results = {}

In [18]:
for dataset_ in DATASETS:
    for aggregator_str_ in AGGREGATORS:
        
        dataset = dataset_
        aggregator_str = aggregator_str_
       
       
        print(f"Using -- Dataset:{dataset} Aggregator:{aggregator_str}")

        #####################################################
        # Configuration
        #####################################################

        MODULE = f"{dataset}.{aggregator_str}.config"
        CHECKPOINT_DIR = f"./{dataset}/{aggregator_str}/checkpoint"
        SEED = 11
        N_SAMPLES = 30

        BATCH_SIZE = 128
        MAX_EPOCHS = 1000
        EARLY_STOPPING = 30
        MAX_CHECKPOINTS = 10
        multiclass = False

        #####################################################
        # Util functions
        #####################################################

        def get_class_from_type(module, class_type):
            for attr in dir(module):
                clazz = getattr(module, attr)
                if callable(clazz) and inspect.isclass(clazz) and issubclass(clazz, class_type) and not str(clazz)==str(class_type):
                    return clazz

            return None

        def get_params_startswith(params, prefix):
            keys = [k for k in params.keys() if k.startswith(prefix)]
            extracted = {}

            for k in keys:
                extracted[k.replace(prefix, "")] = params.pop(k)

            return extracted


        def trainable(config, checkpoint_dir=CHECKPOINT_DIR):
            embedding_size = config.pop("embedding_size")

            encoders_params = get_params_startswith(config, "encoders__")
            aggregator_params = get_params_startswith(config, "aggregator__")
            preprocessor_params = get_params_startswith(config, "preprocessor__")

            model_params = {
                **config,
                "encoders": transformer_config.get_encoders(embedding_size, **{**config, **encoders_params}),
                "aggregator": transformer_config.get_aggregator(embedding_size, **{**config, **aggregator_params}),
                "preprocessor": transformer_config.get_preprocessor(**{**config, **preprocessor_params}),
                "optimizer": torch.optim.SGD,
                "criterion": criterion,
                "device": "cuda" if torch.cuda.is_available() else "cpu",
                "batch_size": BATCH_SIZE,
                "max_epochs": MAX_EPOCHS,
                "n_output": n_labels, # The number of output neurons
                "need_weights": False,
                "verbose": 1

            }

            if not os.path.exists(os.path.join(CHECKPOINT_DIR, "best_model/.fitted")):
                print("Not fitted before! I'm not going to do anything")
                return


            checkpoint = skorch.callbacks.Checkpoint(monitor="balanced_accuracy_best", dirname=os.path.join(CHECKPOINT_DIR, "best_model"))

            model = nn_utils.build_transformer_model(
                        train_indices,
                        val_indices, 
                        [],
                        **model_params
                        )
            model.load_params(checkpoint=checkpoint)
            return model
        

            

        #####################################################
        # Dataset and components
        #####################################################

        module = importlib.import_module(MODULE)

        dataset = get_class_from_type(module, builders.DatasetConfig)
        if dataset is not None:
            dataset = dataset()
        else:
            raise ValueError("Dataset configuration not found")

        transformer_config = get_class_from_type(module, builders.TransformerConfig)
        if transformer_config is not None:
            transformer_config = transformer_config()
        else:
            raise ValueError("Transformer configuration not found")

        search_space_config = get_class_from_type(module, builders.SearchSpaceConfig)
        if search_space_config is not None:
            search_space_config = search_space_config()
        else:
            raise ValueError("Search space configuration not found")

        #####################################################
        # Configure dataset
        #####################################################

        if not dataset.exists():
            dataset.download()

        dataset.load(seed=SEED)

        preprocessor = nn_utils.get_default_preprocessing_pipeline(
                                dataset.get_categorical_columns(),
                                dataset.get_numerical_columns()
                            )

        #####################################################
        # Data preparation
        #####################################################

        train_features, train_labels = dataset.get_train_data()
        val_features, val_labels = dataset.get_val_data()
        test_features, test_labels = dataset.get_test_data()

        preprocessor = preprocessor.fit(train_features, train_labels)

        train_features = preprocessor.transform(train_features)
        val_features = preprocessor.transform(val_features)
        test_features = preprocessor.transform(test_features)

        all_features, all_labels, indices = nn_utils.join_data([train_features, val_features], [train_labels, val_labels])
        train_indices, val_indices = indices[0], indices[1]

        if dataset.get_n_labels() <= 2:
            n_labels = 1
            criterion = torch.nn.BCEWithLogitsLoss
        else:
            n_labels = dataset.get_n_labels()
            multiclass = True
            criterion = torch.nn.CrossEntropyLoss

        #####################################################
        # Hyperparameter search
        #####################################################
        
        #register_trainable("training_function", training_function)
        register_trainable("trainable", trainable)
        
        try:
            '''
            analysis = tune.run(
                trainable,
                resume="AUTO",
                local_dir=CHECKPOINT_DIR, 
                name="param_search"    
            )
            '''
            
            analysis = ExperimentAnalysis(os.path.join(CHECKPOINT_DIR, "param_search"))
            
            best_config = analysis.get_best_config(metric="balanced_accuracy", mode="max")
            '''
            if dataset_ not in results:
                results[dataset_] = {}

            if aggregator_str_ not in results[dataset_]:
                results[dataset_][aggregator_str_] = {}
                
            results[dataset_][aggregator_str_]["balanced_accuracy"] = analysis.get_best_trial(metric="balanced_accuracy", mode="max").last_result["balanced_accuracy"]
            del analysis
            '''
            model = trainable(best_config)
            y_pred = model.predict(test_features)

            if dataset_ not in results:
                results[dataset_] = {}

            if aggregator_str_ not in results[dataset_]:
                results[dataset_][aggregator_str_] = {}

            #results[dataset_][aggregator_str_]["loss"] = metrics.log_loss(test_labels, y_pred)
            results[dataset_][aggregator_str_]["balanced_accuracy"] = metrics.balanced_accuracy_score(test_labels, y_pred)
            #results[dataset_][aggregator_str_]["roc_auc"] = metrics.roc_auc_score(test_labels, y_pred)
            #print(metrics.balanced_accuracy_score(test_labels, y_pred))
            
        except Exception as e:
            pass
        
        


Using -- Dataset:sylvine Aggregator:cls
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20']
Categorical columns: []
Columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'class']
Using -- Dataset:sylvine Aggregator:concatenate
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20']
Categorical columns: []
Columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'class']
Using -- Dataset:sylvine Aggregator:rnn
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18'


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1297383943918189 and num_layers=1



Using -- Dataset:sylvine Aggregator:sum
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20']
Categorical columns: []
Columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'class']
Using -- Dataset:sylvine Aggregator:mean
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20']
Categorical columns: []
Columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'class']
Using -- Dataset:sylvine Aggregator:max
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19'


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.2078449539940142 and num_layers=1



Using -- Dataset:anneal Aggregator:sum
Target mapping: {'3': 0, 'U': 1, '1': 2, '5': 3, '2': 4}
Numerical columns: ['carbon', 'hardness', 'strength']
Categorical columns: ['family', 'product-type', 'steel', 'temper_rolling', 'condition', 'formability', 'non-ageing', 'surface-finish', 'surface-quality', 'enamelability', 'bc', 'bf', 'bt', 'bw%2Fme', 'bl', 'chrom', 'phos', 'cbond', 'exptl', 'ferro', 'blue%2Fbright%2Fvarn%2Fclean', 'lustre', 'shape', 'thick', 'width', 'len', 'oil', 'bore', 'packing']
Columns: ['family', 'product-type', 'steel', 'carbon', 'hardness', 'temper_rolling', 'condition', 'formability', 'strength', 'non-ageing', 'surface-finish', 'surface-quality', 'enamelability', 'bc', 'bf', 'bt', 'bw%2Fme', 'bl', 'chrom', 'phos', 'cbond', 'exptl', 'ferro', 'blue%2Fbright%2Fvarn%2Fclean', 'lustre', 'shape', 'thick', 'width', 'len', 'oil', 'bore', 'packing', 'class']
Using -- Dataset:anneal Aggregator:mean
Target mapping: {'3': 0, 'U': 1, '1': 2, '5': 3, '2': 4}
Numerical columns:


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.44679838969593894 and num_layers=1



Using -- Dataset:adult Aggregator:sum
Target mapping: {'<=50K': 0, '>50K': 1}
Numerical columns: ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
Categorical columns: ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
Columns: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'class']
Using -- Dataset:adult Aggregator:mean
Target mapping: {'<=50K': 0, '>50K': 1}
Numerical columns: ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
Categorical columns: ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']
Columns: ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'h


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.4144357911972334 and num_layers=1



Using -- Dataset:jasmine Aggregator:sum
Target mapping: {1: 0, 0: 1}
Numerical columns: ['V13', 'V23', 'V43', 'V45', 'V56', 'V59', 'V126', 'V131']
Categorical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V24', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V31', 'V32', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V39', 'V40', 'V41', 'V42', 'V44', 'V46', 'V47', 'V48', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V55', 'V57', 'V58', 'V60', 'V61', 'V62', 'V63', 'V64', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V71', 'V72', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V79', 'V80', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V87', 'V88', 'V89', 'V90', 'V91', 'V92', 'V93', 'V94', 'V95', 'V96', 'V97', 'V98', 'V99', 'V100', 'V101', 'V102', 'V103', 'V104', 'V105', 'V106', 'V107', 'V108', 'V109', 'V110', 'V111', 'V112', 'V113', 'V114', 'V115', 'V116', 'V117', 'V118', 'V119', 'V120', 'V121', 'V122', 'V123


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.40930011418351897 and num_layers=1



Using -- Dataset:nomao Aggregator:sum
Target mapping: {2: 0, 1: 1}
Numerical columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V17', 'V18', 'V19', 'V20', 'V21', 'V22', 'V25', 'V26', 'V27', 'V28', 'V29', 'V30', 'V33', 'V34', 'V35', 'V36', 'V37', 'V38', 'V41', 'V42', 'V43', 'V44', 'V45', 'V46', 'V49', 'V50', 'V51', 'V52', 'V53', 'V54', 'V57', 'V58', 'V59', 'V60', 'V61', 'V62', 'V65', 'V66', 'V67', 'V68', 'V69', 'V70', 'V73', 'V74', 'V75', 'V76', 'V77', 'V78', 'V81', 'V82', 'V83', 'V84', 'V85', 'V86', 'V89', 'V90', 'V91', 'V93', 'V94', 'V95', 'V97', 'V98', 'V99', 'V101', 'V102', 'V103', 'V105', 'V106', 'V107', 'V109', 'V110', 'V111', 'V113', 'V114', 'V115', 'V117', 'V118']
Categorical columns: ['V7', 'V8', 'V15', 'V16', 'V23', 'V24', 'V31', 'V32', 'V39', 'V40', 'V47', 'V48', 'V55', 'V56', 'V63', 'V64', 'V71', 'V72', 'V79', 'V80', 'V87', 'V88', 'V92', 'V96', 'V100', 'V104', 'V108', 'V112', 'V116']
Columns: ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8

In [19]:
results

{'sylvine': {'cls': {'balanced_accuracy': 0.9339773453914315},
  'concatenate': {'balanced_accuracy': 0.9144514478852654},
  'rnn': {'balanced_accuracy': 0.9392756006211747},
  'sum': {'balanced_accuracy': 0.909906823787339},
  'mean': {'balanced_accuracy': 0.8983511464328127},
  'max': {'balanced_accuracy': 0.8939663834840595}},
 'anneal': {'cls': {'balanced_accuracy': 0.23333333333333334},
  'rnn': {'balanced_accuracy': 0.22629107981220656},
  'sum': {'balanced_accuracy': 0.35014084507042254},
  'mean': {'balanced_accuracy': 0.2},
  'max': {'balanced_accuracy': 0.2}},
 'adult': {'cls': {'balanced_accuracy': 0.7599713766387479},
  'concatenate': {'balanced_accuracy': 0.5271654836265733},
  'rnn': {'balanced_accuracy': 0.5},
  'sum': {'balanced_accuracy': 0.7328058930121745},
  'mean': {'balanced_accuracy': 0.7600741503920267},
  'max': {'balanced_accuracy': 0.5271342450507179}},
 'jasmine': {'concatenate': {'balanced_accuracy': 0.770979020979021},
  'rnn': {'balanced_accuracy': 0.7636

In [20]:

import pickle

with open("adult/concatenate/checkpoint/param_search/searcher-state-2022-04-28_04-26-37.pkl", "rb") as f:
    obj = pickle.load(f)

obj[0], obj[2]
dir(obj[2].best_params)
from optuna.visualization import plot_optimization_history
plot_optimization_history(obj[2])

In [21]:
obj[2].best_params

{'n_layers': 3,
 'optimizer__lr': 0.006476869642881411,
 'n_head': 16,
 'n_hid': 64,
 'dropout': 0.3614228002379136,
 'embedding_size': 512,
 'numerical_passthrough': False}