In [1]:
%load_ext tensorboard

In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [12]:
import hyperopt
from hyperopt import hp
from hyperopt.pyll.stochastic import sample
import numpy as np
import pandas as pd
import pytorch_lightning as tl
import matplotlib.pyplot as plt
import scipy as sp
import torch
from torch_geometric.data import LightningDataset

from src.models import GNNLayerType, RegressionLayerType, ActivationFunction, PoolingFunction, ModelArchitecture, GNNArchitecture, build_uniform_gnn_architecture, construct_gnn, construct_mlp
from src.config import DEFAULT_N_FEATURES, LOG_DIR, DATA_DIR, RANDOM_SEEDS
from src.training import train_model
from src.reporting import generate_experiment_dir
from src.parameters import HyperParameters
from src.data import HTSDataset, split_dataset, DatasetUsage, partition_dataset, MFPCBA
from src.metrics import DEFAULT_METRICS

In [5]:
dataset = HTSDataset('AID1445', DatasetUsage.DROnly)

In [14]:
params = HyperParameters(
    random_seed=1424,
    use_sd_readouts=False,
    dataset_split=MFPCBA(RANDOM_SEEDS['AID1445']),
    test_split=0.2,
    train_val_split=0.75,
    batch_size=32,
    early_stop_patience=30,
    early_stop_min_delta=0.01,
    lr=0.0001,
    max_epochs=5,
    num_workers=0,
)

In [20]:
for train, val, test in partition_dataset(dataset, params):
    

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [9]:
DEFAULT_METRICS

MetricCollection(
  (MeanAbsoluteError): MeanAbsoluteError()
  (RootMeanSquaredError): RootMeanSquaredError()
  (MaxError): MaxError()
  (PearsonCorrCoefSquared): PearsonCorrCoefSquared()
  (R2Score): R2Score()
)

## Extension 2 - HyperOpt

In [12]:
def create_gnn_architecture(space):
    layers = space['layers']
    regression_architecture = ModelArchitecture(
        layer_types=[RegressionLayerType.Linear],
        features=[int(layers['hidden_features']), 1],
        activation_funcs=[None],
        batch_normalise=[False]
    )
    return GNNArchitecture(
        layer_types=layers['layer_types'],
        features=[DEFAULT_N_FEATURES] + [int(layers['hidden_features'])] * layers['num'],
        activation_funcs=layers['activation_funcs'],
        batch_normalise=[space['batch_normalise']] * layers['num'],
        pool_func=space['pool_func'],
        regression_layer=regression_architecture
    )

In [4]:
def prepare_objective(dataset_name, params, experiment_dir):
    dataset_dir = DATA_DIR / dataset_name
    dataset = HTSDataset(dataset_dir, 'DR')
    test_dataset, training_dataset = split_dataset(dataset, params.test_split)
    train_dataset, val_dataset = split_dataset(dataset, params.train_val_split)
    datamodule = LightningDataset(train_dataset, val_dataset, test_dataset, batch_size=params.batch_size, num_workers=NUM_WORKERS)
    
    def objective(x):
        run_dir = experiment_dir / f'version_{objective.version}'
        architecture = create_gnn_architecture(x)
        print(architecture.layer_types)
        print(type(architecture.layer_types))
        result = train_model(architecture, params, datamodule, run_dir)
        return {'loss': result['RootMeanSquaredError'], 'status': hyperopt.STATUS_OK}
    
    objective.version = 0
    return objective

                
def run_hyperopt(dataset_name, search_space, params, max_evals, experiment_name):
    experiment_dir = LOG_DIR / generate_experiment_dir(dataset_name, params.use_sd_readouts, 'hyperopt' + experiment_name)
    objective = prepare_objective(dataset_name, params, experiment_dir)
    best = hyperopt.fmin(
        fn=objective,
        space=search_space,
        algo=hyperopt.tpe.suggest,
        max_evals=max_evals
    )
    print(hyperopt.space_eval(search_space, best))
    

In [13]:
simple_search_space = {
    'pool_func': hp.choice('pool_func', PoolingFunction),
    'batch_normalise': hp.choice('batch_normalise', [True, False]),
    'layers': hp.choice('layers', [
        {
            'num': i,
            'layer_types': [hp.choice(f'type{i}{j}', [GNNLayerType.GCN, GNNLayerType.GAT]) for j in range(i)],
            'hidden_features': hp.quniform(f'features{i}', 16, 256, 8),
            'activation_funcs': [hp.choice(f'activation{i}{j}', ActivationFunction) for j in range(i)],
        }
        for i in range(2, 3)
    ]),
    
}

params = HyperParameters(
    random_seed=1424,
    use_sd_readouts=False,
    k_folds=1,
    test_split=0.2,
    train_val_split=0.75,
    batch_size=32,
    early_stop_patience=30,
    early_stop_min_delta=0.01,
    lr=0.0001,
    max_epochs=5
)

run_hyperopt('AID1445', simple_search_space, params, 10, 'simple2layer')

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
  0%|                                                                                                                                                                                           | 0/10 [00:00<?, ?trial/s, best loss=?]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_7970c8 | 11.5 K
1 | regression_mlp | Sequential        | 65    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
11.5 K    Trainable params
0         Non-trainable params
11.5 K    Total params
0.046     Total estimated model params size (MB)
  rank_zero_warn(

  rank_zero_warn(

`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_1\checkpoints\epoch=04-loss/val=0.79.c

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 10%|████████████████                                                                                                                                                 | 1/10 [00:05<00:45,  5.11s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_7c7aac | 7.8 K 
1 | regression_mlp | Sequential        | 49    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
7.9 K     Trainable params
0         Non-trainable params
7.9 K     Total params
0.031     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_2\checkpoints\epoch=04-loss/val=3.74.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVIC

(<GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>, <GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 20%|████████████████████████████████▏                                                                                                                                | 2/10 [00:09<00:36,  4.58s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_7efbe7 | 13.8 K
1 | regression_mlp | Sequential        | 73    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
13.8 K    Trainable params
0         Non-trainable params
13.8 K    Total params
0.055     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_3\checkpoints\epoch=03-loss/val=0.76.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVIC

(<GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 30%|████████████████████████████████████████████████▎                                                                                                                | 3/10 [00:14<00:35,  5.02s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_824c48 | 96.5 K
1 | regression_mlp | Sequential        | 257   
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
96.8 K    Trainable params
0         Non-trainable params
96.8 K    Total params
0.387     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_4\checkpoints\epoch=03-loss/val=19.26.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVI

(<GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>, <GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 40%|████████████████████████████████████████████████████████████████▍                                                                                                | 4/10 [00:22<00:35,  5.96s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_86bbb3 | 4.8 K 
1 | regression_mlp | Sequential        | 33    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
4.9 K     Trainable params
0         Non-trainable params
4.9 K     Total params
0.019     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_5\checkpoints\epoch=04-loss/val=20.00.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVI

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 50%|████████████████████████████████████████████████████████████████████████████████▌                                                                                | 5/10 [00:27<00:28,  5.78s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_89f5f2 | 63.0 K
1 | regression_mlp | Sequential        | 201   
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
63.2 K    Trainable params
0         Non-trainable params
63.2 K    Total params
0.253     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_6\checkpoints\epoch=04-loss/val=12.46.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVI

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 60%|████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                | 6/10 [00:33<00:23,  5.75s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_8d5aa3 | 34.4 K
1 | regression_mlp | Sequential        | 137   
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
34.5 K    Trainable params
0         Non-trainable params
34.5 K    Total params
0.138     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_7\checkpoints\epoch=04-loss/val=16.10.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVI

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                | 7/10 [00:38<00:16,  5.59s/trial, best loss: 0.9604218006134033]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_907cdf | 75.9 K
1 | regression_mlp | Sequential        | 225   
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
76.2 K    Trainable params
0         Non-trainable params
76.2 K    Total params
0.305     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_8\checkpoints\epoch=04-loss/val=0.80.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVIC

(<GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 80%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 8/10 [00:44<00:11,  5.61s/trial, best loss: 0.9565171003341675]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_93dcf4 | 18.0 K
1 | regression_mlp | Sequential        | 89    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
18.1 K    Trainable params
0         Non-trainable params
18.1 K    Total params
0.073     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_9\checkpoints\epoch=04-loss/val=13.01.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVI

(<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GAT: <class 'torch_geometric.nn.conv.gat_conv.GATConv'>>)                                                                                       
<class 'tuple'>                                                                                                                                                                                                                        
 90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                | 9/10 [00:49<00:05,  5.31s/trial, best loss: 0.9565171003341675]

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type              | Params
-----------------------------------------------------
0 | gnn            | Sequential_96a30c | 6.3 K 
1 | regression_mlp | Sequential        | 41    
2 | loss           | MSELoss           | 0     
3 | val_metrics    | MetricCollection  | 0     
4 | test_metrics   | MetricCollection  | 0     
-----------------------------------------------------
6.3 K     Trainable params
0         Non-trainable params
6.3 K     Total params
0.025     Total estimated model params size (MB)
`Trainer.fit` stopped: `max_epochs=5` reached.
Restoring states from the checkpoint path at C:\Users\MrMil\dev\automl-drug-discovery\logs\AID1445\DR\hyperoptsimple2layer\version_0\lightning_logs\version_10\checkpoints\epoch=04-loss/val=20.24.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEV

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:53<00:00,  5.35s/trial, best loss: 0.9565171003341675]
{'batch_normalise': False, 'layers': {'activation_funcs': ('ReLU', 'ReLU'), 'hidden_features': 224.0, 'layer_types': (<GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>, <GNNLayerType.GCN: <class 'torch_geometric.nn.conv.gcn_conv.GCNConv'>>), 'num': 2}, 'pool_func': <PoolingFunction.ADD: functools.partial(<function global_add_pool at 0x0000013C8ABF6C20>)>}
