In [1]:
import os
os.chdir('../')

from ast import arg
from DeepMTP.main import DeepMTP
from DeepMTP.hpo_worker import BaseWorker
from DeepMTP.simple_hyperband import HyperBand
from DeepMTP.dataset import load_process_MLC, load_process_MTR, load_process_DP, process_dummy_MLC, process_dummy_MTR, process_dummy_DP, load_process_MC
from DeepMTP.utils.data_utils import data_process, BaseDataset
from DeepMTP.utils.utils import generate_config
import ConfigSpace as CS
import ConfigSpace.hyperparameters as CSH

In [2]:
# define the configuration space
cs= CS.ConfigurationSpace()

lr= CSH.UniformFloatHyperparameter(
    "learning_rate", lower=1e-6, upper=1e-3, default_value=1e-3, log=True
)
cs.add_hyperparameters([lr])

embedding_size= CSH.UniformIntegerHyperparameter(
    "embedding_size", lower=8, upper=2048, default_value=64, log=False
)

instance_branch_layers= CSH.UniformIntegerHyperparameter(
    "instance_branch_layers", lower=1, upper=2, default_value=1, log=False
)

instance_branch_nodes_per_layer= CSH.UniformIntegerHyperparameter(
    "instance_branch_nodes_per_layer", lower=8, upper=2048, default_value=64, log=False
)

target_branch_layers = CSH.UniformIntegerHyperparameter(
    "target_branch_layers", lower=1, upper=2, default_value=1, log=False
)

target_branch_nodes_per_layer = CSH.UniformIntegerHyperparameter(
    "target_branch_nodes_per_layer", lower=8, upper=2048, default_value=64, log=False
)

dropout_rate = CSH.UniformFloatHyperparameter(
    "dropout_rate", lower=0.0, upper=0.9, default_value=0.4, log=False
)

batch_norm = CSH.CategoricalHyperparameter("batch_norm", [True, False])

cs.add_hyperparameters(
    [
        embedding_size,
        instance_branch_layers,
        instance_branch_nodes_per_layer,
        target_branch_layers,
        target_branch_nodes_per_layer,
        dropout_rate,
        batch_norm,
    ]
)

cond = CS.GreaterThanCondition(dropout_rate, instance_branch_layers, 1)
cond2 = CS.GreaterThanCondition(batch_norm, instance_branch_layers, 1)
cond3 = CS.GreaterThanCondition(dropout_rate, target_branch_layers, 1)
cond4 = CS.GreaterThanCondition(batch_norm, target_branch_layers, 1)

cs.add_condition(CS.OrConjunction(cond, cond3))
cs.add_condition(CS.OrConjunction(cond2, cond4))

(batch_norm | instance_branch_layers > 1 || batch_norm | target_branch_layers > 1)

In [3]:
data = load_process_MLC(dataset_name='yeast', variant='undivided')
train, val, test, data_info = data_process(data, validation_setting='B', verbose=True)

Processing...
yeast:undivided - exists, not redownloading
Done
Interaction file: 2d numpy array format detected
Interaction file: checking format consistency... Passed
Interaction file: checking instance id format consistency... Passed
Interaction file: checking target id type consistency... Passed

Interaction file: checking target variable type consistency... Passed
Automatically detected type of target variable type: binary

-- Test set was not provided, could not detect if novel instances exist or not 
-- Test set was not provided, could not detect if novel targets exist or not 

Instance features file: processing features... Done

Cross input consistency for (numpy) interaction data and instance features checks out
-- Same instance ids in the interaction and features files for the train set

Splitting train to train-test according to validation setting B... Done
Splitting train to train-val according to validation setting B... Done


In [4]:
data_info

{'detected_validation_setting': 'B',
 'detected_problem_mode': 'classification',
 'instance_branch_input_dim': 103,
 'target_branch_input_dim': 14}

In [5]:
config = {    

    'hpo_results_path': './hyperband/',

    'instance_branch_input_dim': data_info['instance_branch_input_dim'],
    'target_branch_input_dim': data_info['target_branch_input_dim'],
    'validation_setting': data_info['detected_validation_setting'],
    'general_architecture_version': 'dot_product',
    'problem_mode': data_info['detected_problem_mode'],

    'compute_mode': 'cuda:0',
    'train_batchsize': 512,
    'val_batchsize': 512,
    'num_epochs': 6,
    'num_workers': 8,

    'metrics': ['hamming_loss', 'auroc'],
    'metrics_average': ['macro'],
    'patience': 10,

    'evaluate_train': True,
    'evaluate_val': True,

    'verbose': True,
    'results_verbose': False,
    'use_early_stopping': True,
    'use_tensorboard_logger': True,
    'wandb_project_name': None,
    'wandb_project_entity': None,
    'metric_to_optimize_early_stopping': 'loss',
    'metric_to_optimize_best_epoch_selection': 'loss',

    'instance_branch_architecture': 'MLP',

    'target_branch_architecture': 'MLP',

    'save_model': True,

    'eval_every_n_epochs': 10,


    'additional_info': {'eta': 3, 'max_budget': 9}
    }

In [6]:
worker = BaseWorker(
    train, val, test, data_info, config, 'loss'
)

In [7]:
worker.base_config

{'hpo_results_path': './hyperband/',
 'instance_branch_input_dim': 103,
 'target_branch_input_dim': 14,
 'validation_setting': 'B',
 'enable_dot_product_version': True,
 'problem_mode': 'classification',
 'compute_mode': 'cuda:6',
 'train_batchsize': 512,
 'val_batchsize': 512,
 'num_epochs': 6,
 'num_workers': 8,
 'metrics': ['hamming_loss', 'auroc'],
 'metrics_average': ['macro'],
 'patience': 10,
 'evaluate_train': True,
 'evaluate_val': True,
 'verbose': True,
 'results_verbose': False,
 'use_early_stopping': True,
 'use_tensorboard_logger': True,
 'wandb_project_name': None,
 'wandb_project_entity': None,
 'metric_to_optimize_early_stopping': 'loss',
 'metric_to_optimize_best_epoch_selection': 'loss',
 'instance_branch_architecture': 'MLP',
 'target_branch_architecture': 'MLP',
 'save_model': True,
 'eval_every_n_epochs': 10,
 'running_hpo': True,
 'additional_info': {'eta': 3, 'max_budget': 9}}

In [8]:
hb = HyperBand(
    base_worker=worker,
    configspace=cs,
    eta=config['additional_info']['eta'],
    max_budget=config['additional_info']['max_budget'],
    direction="min",
    verbose=True
)

These are the pre-calculate brackets and successive halving runs:
{9: {'n_i': [9, 3, 1], 'r_i': [1, 3, 9], 'num_iters': 3}, 3: {'n_i': [3], 'r_i': [9], 'num_iters': 1}}


In [9]:
best_overall_config = hb.run_optimizer()

-- Running bracket with starting budget: 9
---- Evaluating configuration... 
Selected device: cuda:6
Starting training...
Epoch:0... Done
  Validating... Calculating val performance... Done
Done
Starting testing... Calculating test performance... Done
Done
+-------+--------+--------+--------------------+-------------+
|  mode | #epoch |  loss  | hamming_loss_macro | auroc_macro |
+-------+--------+--------+--------------------+-------------+
| train |   0    | 0.8804 |       0.6955       |    0.5131   |
+-------+--------+--------+--------------------+-------------+
+------+--------+--------+--------------------+-------------+----------------+
| mode | #epoch |  loss  | hamming_loss_macro | auroc_macro | early_stopping |
+------+--------+--------+--------------------+-------------+----------------+
| val  |   0    | 0.8437 |       0.7025       |    0.5247   |      0/10      |
+------+--------+--------+--------------------+-------------+----------------+
+------+--------+------+---------

# Load the best model and generate predictions for the test set

In [10]:
best_model = DeepMTP(best_overall_config.info['config'], checkpoint_dir=best_overall_config.info['model_dir'])

Loading checkpoint from ./hyperband/20_06_2022__12_53_33//20_06_2022__12_57_42/model.pt...  
Done
Selected device: cuda:1
Applying saved weights... Done


In [11]:
best_model_results = best_model.predict(test, verbose=True)

Calculating  performance... Done


In [12]:
best_model_results

({'hamming_loss_target_0': 0.2231404958677686,
  'auroc_target_0': 0.7904002720111827,
  'hamming_loss_target_1': 0.40082644628099173,
  'auroc_target_1': 0.6637501519018105,
  'hamming_loss_target_2': 0.2706611570247934,
  'auroc_target_2': 0.7880881519274378,
  'hamming_loss_target_3': 0.25413223140495866,
  'auroc_target_3': 0.8078022532369262,
  'hamming_loss_target_4': 0.22520661157024793,
  'auroc_target_4': 0.7366217320261437,
  'hamming_loss_target_5': 0.25413223140495866,
  'auroc_target_5': 0.6755993736077108,
  'hamming_loss_target_6': 0.15702479338842976,
  'auroc_target_6': 0.680437306501548,
  'hamming_loss_target_7': 0.17148760330578514,
  'auroc_target_7': 0.6357599975963706,
  'hamming_loss_target_8': 0.06198347107438017,
  'auroc_target_8': 0.5914831130690161,
  'hamming_loss_target_9': 0.11363636363636363,
  'auroc_target_9': 0.6189023098114007,
  'hamming_loss_target_10': 0.128099173553719,
  'auroc_target_10': 0.5897034092646385,
  'hamming_loss_target_11': 0.32231