*this notebook was run in Google Colab*

# setup

In [None]:
# mount notebook
from google.colab import drive

mount='/content/gdrive'
print("Colab: mounting Google drive on ", mount)

drive.mount(mount)

# switch to the directory on the Google Drive that you want to use
import os
drive_root = mount + "/My Drive/Colab Notebooks/thesis_training_models"
  
# create drive_root if it doesn't exist
create_drive_root = True
if create_drive_root:
    print("\nColab: making sure ", drive_root, " exists.")
    os.makedirs(drive_root, exist_ok=True)

# change to the directory
print("\nColab: Changing directory to ", drive_root)
%cd $drive_root

Colab: mounting Google drive on  /content/gdrive


In [None]:
# check computational resources: GPU
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

In [None]:
# check computational resources: RAM
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

In [None]:
# installations

# ogb
! pip install ogb

# dgl
! pip install dgl-cu113 dglgo -f https://data.dgl.ai/wheels/repo.html

# mydgllife
! pip install git+https://github.com/jacobumland/my-dgl-lifesci.git#subdirectory=python

# rdkit
! pip install rdkit 

In [None]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import dgl
import time
from ogb.graphproppred import DglGraphPropPredDataset, collate_dgl
from torch.utils.data import DataLoader
from ogb.graphproppred import Evaluator
from mydgllife.model.model_zoo import * # import all models from model zoo
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from dgl import backend as F
from os.path import exists as file_exists
import pickle
import numpy as np
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_curve, auc
import pandas as pd
import re

In [None]:
# set device
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# helper functions

In [None]:
def train_epoch(model, device, data_loader, opt, loss_fn):
    model.train()
    train_loss = []
    for g, labels in data_loader:
        g = g.to(device)
        labels = labels.to(torch.float32).to(device)
        
        if model_name in ["gcn", "gat", "nfp"]:
            logits = model(g, g.ndata['feat'].float())  # only taking into account node features
        elif model_name in ["attfp", "pagt", "weave", "mpnn"]:
            logits =  model(g, g.ndata['feat'].float(), g.edata['feat'].float()) # taking into account node and edge features

        loss = loss_fn(logits, labels)
        train_loss.append(loss.item())
        
        opt.zero_grad()
        loss.backward()
        opt.step()

    return sum(train_loss) / len(train_loss) # return average loss from epoch

def eval_epoch(model, device, data_loader, evaluator):
    model.eval()
    y_true, y_pred = [], []

    for g, labels in data_loader:
        g = g.to(device)
        if model_name in ["gcn", "gat", "nfp"]:
            logits = model(g, g.ndata['feat'].float())  # only taking into account node features
        elif model_name in ["attfp", "pagt", "weave", "mpnn"]:
            logits =  model(g, g.ndata['feat'].float(), g.edata['feat'].float()) # taking into account node and edge features        
        y_true.append(labels.detach().cpu())
        y_pred.append(logits.detach().cpu())
    
    y_true = torch.cat(y_true, dim=0).numpy()
    y_pred = torch.cat(y_pred, dim=0).numpy()

    return evaluator.eval({
        'y_true': y_true,
        'y_pred': y_pred
    })['rocauc']

In [None]:
def eval_epoch_statistical(model, device, data_loader, evaluator):
    model.eval()
    y_true, y_pro = [], []

    for g, labels in data_loader:
        g = g.to(device)
        if model_name in ["gcn", "gat", "nfp"]:
            logits = model(g, g.ndata['feat'].float())  # only taking into account node features
        elif model_name in ["attfp", "pagt", "weave", "mpnn"]:
            logits =  model(g, g.ndata['feat'].float(), g.edata['feat'].float()) # taking into account node and edge features        
        y_true.append(labels.detach().cpu())
        y_pro.append(logits.detach().cpu())
    
    y_true = torch.cat(y_true, dim=0).numpy()
    y_pro = torch.cat(y_pro, dim=0).numpy()
    y_pred = np.argmax(y_pro, axis=1)

    tn, fp, fn, tp, se, sp, acc, mcc, auc_prc, auc_roc = statistical(y_true, y_pred, y_pro)

    return tn, fp, fn, tp, se, sp, acc, mcc, auc_prc, auc_roc

In [None]:
# metrics
def statistical(y_true, y_pred, y_pro):
    c_mat = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = list(c_mat.flatten())
    se = tp / (tp + fn)
    sp = tn / (tn + fp)
    acc = (tp + tn) / (tn + fp + fn + tp)
    mcc = (tp * tn - fp * fn) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn) + 1e-8)
    auc_prc = auc(precision_recall_curve(y_true, y_pro, pos_label=1)[1],
                  precision_recall_curve(y_true, y_pro, pos_label=1)[0])
    auc_roc = roc_auc_score(y_true, y_pro)
    return tn, fp, fn, tp, se, sp, acc, mcc, auc_prc, auc_roc

In [None]:
# hyperparams opt functions
def hyper_opt(hyper_space):
    
    # get the model instance
    if model_name == "attfp":
        model = AttentiveFPPredictor(node_feat_size = node_feat_dim,
                                     edge_feat_size = edge_feat_dim,
                                     num_layers = hyper_space["num_layers"],
                                     num_timesteps = hyper_space["num_timesteps"],
                                     graph_feat_size = hyper_space["graph_feat_size"],
                                     dropout = hyper_space["dropout"])
        
        

    elif model_name == "gat":
        model = GATPredictor(in_feats = node_feat_dim,
                             hidden_feats = hyper_space["hidden_feats"],
                             num_heads = hyper_space["num_heads"],
                             predictor_hidden_feats = hyper_space["predictor_hidden_feats"],
                             predictor_dropout = hyper_space["predictor_dropout"])

    elif model_name == "gcn":
        model = GCNPredictor(in_feats = node_feat_dim,
                             hidden_feats = hyper_space["hidden_feats"],
                             dropout = hyper_space["dropout"],
                             predictor_hidden_feats = hyper_space["predictor_hidden_feats"],
                             predictor_dropout = hyper_space["predictor_dropout"])

    elif model_name == "mpnn":
        model = MPNNPredictor(node_in_feats = node_feat_dim,
                              edge_in_feats = edge_feat_dim,
                              node_out_feats = hyper_space["node_out_feats"],
                              edge_hidden_feats = hyper_space["edge_hidden_feats"],
                              num_step_message_passing = hyper_space["num_step_message_passing"],
                              num_step_set2set = hyper_space["num_step_set2set"],
                              num_layer_set2set = hyper_space["num_layer_set2set"])

    elif model_name == "weave":
        model = WeavePredictor(node_in_feats = node_feat_dim,
                               edge_in_feats = edge_feat_dim,
                               gnn_hidden_feats = hyper_space["gnn_hidden_feats"],
                               graph_feats =  hyper_space["graph_feats"],
                               num_gnn_layers = hyper_space["num_gnn_layers"]
                               )

    elif model_name == "nfp":
        model = NFPredictor(in_feats = node_feat_dim, 
                            hidden_feats= hyper_space["hidden_feats"],
                            max_degree = hyper_space["max_degree"],
                            dropout = hyper_space["dropout"],
                            predictor_hidden_size = hyper_space["predictor_hidden_size"],
                            predictor_dropout = hyper_space["predictor_dropout"]
                            )

    elif model_name == "pagt":
        model = PAGTNPredictor(node_in_feats =node_feat_dim,
                               edge_feats =edge_feat_dim,
                               node_out_feats = hyper_space["node_out_feats"],
                               node_hid_feats = hyper_space["node_hid_feats"],
                               depth=hyper_space["depth"],
                               nheads=hyper_space["nheads"],
                               dropout=hyper_space["dropout"]
                               )

    # loss function
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device)) # for classification

    # optimizer
    opt = optim.Adam(model.parameters(), lr=hyper_space["lr"], weight_decay=hyper_space["l2"])

    # load model 
    model = model.to(device)

    # training
    best_auc = 0
    best_model = copy.deepcopy(model)

    #  early stopping
    increase_idx = 0
    
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model, device, train_loader, opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model, device, train_loader, evaluator)
        valid_auc = eval_epoch(model, device, valid_loader, evaluator)

        loss = 1 - valid_auc

        # print(f'epoch {j} | Train Loss: {train_loss:.4f} | Train Auc: {train_auc:.4f} | Valid Auc: {valid_auc:.4f}')

        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break

        #if valid_auc > best_auc:
        #    best_auc = valid_auc
        #    best_model = copy.deepcopy(model)

    # test auc  
    test_auc = eval_epoch(best_model, device, test_loader, evaluator)

    return {'loss': loss, 'status': STATUS_OK}

In [None]:
"""# patience when no increase in X
# j is number of epoch
# we need another indicator of when the last update was made (the last increase)
epoks = 3
increase_idx = 0
patienc = 2

for j in range(epoks):
    print(f"j {j}")
    # checks if there was an update
    if float(np.random.rand(1,1)) > float(np.random.rand(1,1)):
        increase_idx = j
        # save model
    # checks if the last update is X (patience) ago
    if j - increase_idx == patienc:
        break
    print(f"increase_idx {increase_idx}")

# increase_idx = """

In [None]:
import numpy as np

In [None]:
float(np.random.rand(1,1)) > float(np.random.rand(1,1))

In [None]:
# calculate positive weight
def get_pos_weight(data):
    num_pos = F.sum(data.labels, dim=0)
    num_indices = F.tensor(len(data.labels))
    return (num_indices - num_pos) / num_pos

# data

In [None]:
# data & evaluator
dataset = DglGraphPropPredDataset(name="ogbg-molhiv")
evaluator = Evaluator(name="ogbg-molhiv")
g, _ = dataset[0]
node_feat_dim = g.ndata['feat'].size()[-1]
edge_feat_dim = g.edata['feat'].size()[-1]

batch_size = 128

split_idx = dataset.get_idx_split()
train_loader = DataLoader(dataset[split_idx["train"]],
                            batch_size=batch_size,
                            shuffle=True,
                            collate_fn=collate_dgl)
valid_loader = DataLoader(dataset[split_idx["valid"]],
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_dgl)
test_loader = DataLoader(dataset[split_idx["test"]],
                            batch_size=batch_size,
                            shuffle=False,
                            collate_fn=collate_dgl)

In [None]:
# calculare positive weight
pos_weight = get_pos_weight(dataset)

# experimentation overview

In [None]:
overview_df_filename = "experimentation_overview"
if not file_exists(overview_df_filename):
    print("no experiments conducted yet, please run the models")
else:
    overview_df = pd.read_parquet(overview_df_filename)
    display(overview_df)

# GNNs

which GNNs to include?
- AttentiveFPPredictor
- GATPredictor
- GCNPredictor
- MPNNPredictor
- WeavePredictor
- NFPredictor
- PAGTNPredictor

In [None]:
# hyper space across models
hspace_gnns = {'attfp': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]),
                                     lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]), # default: default: 1e-3
                                     num_layers=hp.choice('num_layers', [2, 3, 4, 5, 6]), # default: 2
                                     num_timesteps=hp.choice('num_timesteps', [1, 2, 3, 4, 5]), # default: 2
                                     graph_feat_size=hp.choice('graph_feat_size', [50, 100, 200, 300]), # default: 200
                                     dropout=hp.choice('dropout', [0, 0.1, 0.3, 0.5])
                                     ),
                 
                 'gat': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]),
                             lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]),
                             hidden_feats=hp.choice('hidden_feats', [[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]),
                             num_heads=hp.choice('num_heads', [[2, 2], [3, 3], [4, 4], [4, 3], [3, 2]]),
                             predictor_hidden_feats=hp.choice('predictor_hidden_feats', [128, 64, 256]),
                             predictor_dropout=hp.choice('predictor_dropout', [0, 0.1, 0.2, 0.3, 0.4])
                             ),              

                 'gcn': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]), 
                             lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]), # default: default: 1e-3
                             hidden_feats=hp.choice('hidden_feats', [[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]), # default: 2 GCN layers [64, 64]
                             dropout=hp.choice('dropout', [[0, 0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]]), # default: 0
                             predictor_hidden_feats=hp.choice('predictor_hidden_feats', [128, 64, 256]), # default 128
                             predictor_dropout=hp.choice('predictor_dropout', [0, 0.1, 0.2, 0.3, 0.4, 0.5])
                             ),
                 
                 'mpnn': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]), 
                              lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]),
                              node_out_feats=hp.choice('node_out_feats', [128, 64, 32, 16]),
                              edge_hidden_feats=hp.choice('edge_hidden_feats', [128, 64, 32, 16]),
                              num_step_message_passing=hp.choice('num_step_message_passing', [2, 4, 6, 8]),
                              num_step_set2set= hp.choice('num_step_set2set', [2, 4, 6, 8])
                              ),

                 'weave':dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]), 
                              lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]),
                              gnn_hidden_feats = hp.choice('gnn_hidden_feats', [128, 64, 50, 32]),
                              num_gnn_layers = hp.choice('num_gnn_layers', [2, 3]),
                              graph_feats = hp.choice('graph_feats', [128, 64, 50, 32])
                              ),

                 'nfp': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]),
                             lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]),
                             hidden_feats=hp.choice('hidden_feats', [[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]),
                             dropout = hp.choice('dropout', [[0, 0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]]),
                             predictor_hidden_size = hp.choice('predictor_hidden_size', [256, 128, 64, 32]),
                             predictor_dropout = hp.choice('predictor_dropout', [0, 0.1, 0.2, 0.3, 0.4, 0.5]),
                             ),

                 'pagt': dict(l2=hp.choice('l2', [0, 10 ** -8, 10 ** -6, 10 ** -4]),
                               lr=hp.choice('lr', [10 ** -2.5, 10 ** -3.5, 10 ** -1.5]),
                               node_hid_feats =hp.choice('node_hid_feats', [32, 64, 128]),
                               node_out_feats=hp.choice('node_out_feats', [128, 64, 32, 16]),
                               depth=hp.choice('depth', [3, 5, 7]),
                               nheads=hp.choice('nheads', [1, 2, 3, 4]),
                               dropout=hp.choice('dropout', [0, 0.1, 0.2, 0.3, 0.4, 0.5]),
                               )
                 }

# lists for hp.choice for constructing models after optimization
# attfp
attfp_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
attfp_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
attfp_num_layers_ls=[2, 3, 4, 5, 6]
attfp_num_timesteps_ls=[1, 2, 3, 4, 5]
attfp_graph_feat_size_ls=[50, 100, 200, 300]
attfp_dropout_ls=[0, 0.1, 0.3, 0.5]

# gat
gat_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
gat_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
gat_hidden_feats_ls=[[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]
gat_num_heads_ls=[[2, 2], [3, 3], [4, 4], [4, 3], [3, 2]]
gat_predictor_hidden_feats_ls=[128, 64, 256]
gat_predictor_dropout_ls=[0, 0.1, 0.2, 0.3, 0.4]


# gcn
gcn_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
gcn_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
gcn_hidden_feats_ls=[[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]
gcn_dropout_ls=[[0, 0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]]
gcn_predictor_hidden_feats_ls=[128, 64, 256]
gcn_predictor_dropout_ls=[0, 0.1, 0.2, 0.3, 0.4, 0.5]

# mpnn
mpnn_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
mpnn_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
mpnn_node_out_feats_ls=[128, 64, 32, 16]
mpnn_edge_hidden_feats_ls=[128, 64, 32, 16]
mpnn_num_step_message_passing_ls=[2, 4, 6, 8]
mpnn_num_step_set2set_ls=[2, 3, 4]

# weave
weave_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
weave_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
weave_gnn_hidden_feats_ls=[128, 64, 50, 32]
weave_num_gnn_layers_ls=[2, 3]
weave_graph_feats_ls=[128, 64, 50, 32]

# nfp
nfp_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
nfp_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
nfp_hidden_feats_ls=[[64, 64], [128, 128], [256, 256], [128, 64], [256, 128]]
nfp_dropout_ls=[[0, 0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3], [0.4, 0.4], [0.5, 0.5]]
nfp_predictor_hidden_size_ls=[256, 128, 64, 32]
nfp_predictor_dropout_ls=[0, 0.1, 0.2, 0.3, 0.4, 0.5]

# pagt
pagt_l2_ls=[0, 10 ** -8, 10 ** -6, 10 ** -4]
pagt_lr_ls=[10 ** -2.5, 10 ** -3.5, 10 ** -1.5]
pagt_node_hid_feats_ls=[32, 64, 128]
pagt_node_out_feats_ls=[128, 64, 32, 16]
pagt_depth_ls=[3, 5, 7]
pagt_nheads_ls=[1, 2, 3, 4]
pagt_dropout_ls=[0, 0.1, 0.2, 0.3, 0.4, 0.5]

In [None]:
# hyperparameter optimization parameters
epochs = 200
patience = 50
OPT_ITERS = 30
repetitions = 5 

## GCN

In [None]:
# gcn setup
model_name = "gcn" 
hyper_space = hspace_gnns[model_name]
filename_gcn = "gcn_opt"

if file_exists(filename_gcn+".sav") and file_exists(filename_gcn+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best GCN model is:")
    loaded_model = pickle.load(open(filename_gcn+".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_gcn + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_gcn = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {gcn_lr_ls[best_results_gcn['lr']]} | "
            f"L2 regularization {gcn_l2_ls[best_results_gcn['l2']]} | "
            f"hidden_feats {gcn_hidden_feats_ls[best_results_gcn['hidden_feats']]} | "
            f"dropout {gcn_dropout_ls[best_results_gcn['dropout']]} | "
            f"predictor_hidden_feats {gcn_predictor_hidden_feats_ls[best_results_gcn['predictor_hidden_feats']]} | "
            f"predictor_dropout {gcn_predictor_dropout_ls[best_results_gcn['predictor_dropout']]}"
            )
    print(text)
    
    # reconstruct best model
    model_hp_opt = GCNPredictor(in_feats = node_feat_dim,
                              hidden_feats = gcn_hidden_feats_ls[best_results_gcn['hidden_feats']],
                              dropout = gcn_dropout_ls[best_results_gcn['dropout']],
                              predictor_hidden_feats = gcn_predictor_hidden_feats_ls[best_results_gcn['predictor_hidden_feats']],
                              predictor_dropout = gcn_predictor_dropout_ls[best_results_gcn['predictor_dropout']])
    
    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=gcn_lr_ls[best_results_gcn["lr"]], weight_decay=gcn_l2_ls[best_results_gcn["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_gcn+"_hps", 'wb') as f:
        pickle.dump(best_results_gcn, f)
    # loadable via ...
    # with open(filename_gcn+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_gcn+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_gcn+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
        
        model_hp_opt = GCNPredictor(in_feats = node_feat_dim,
                                hidden_feats = gcn_hidden_feats_ls[best_results_gcn['hidden_feats']],
                                dropout = gcn_dropout_ls[best_results_gcn['dropout']],
                                predictor_hidden_feats = gcn_predictor_hidden_feats_ls[best_results_gcn['predictor_hidden_feats']],
                                predictor_dropout = gcn_predictor_dropout_ls[best_results_gcn['predictor_dropout']])
        
        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=gcn_lr_ls[best_results_gcn["lr"]], weight_decay=gcn_l2_ls[best_results_gcn["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    gcn_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    gcn_perf.to_parquet(filename_gcn + "_performance", index=0)      
    # loadable via ...
    # gcn_perf = pd.read_parquet(filename_gcn + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(gcn_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(gcn_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(gcn_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "GCN"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_gcn
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": gcn_lr_ls[best_results_gcn['lr']],
                       "L2 regularization": gcn_l2_ls[best_results_gcn['l2']],
                       "hidden_feats": gcn_hidden_feats_ls[best_results_gcn['hidden_feats']],
                       "dropout": gcn_dropout_ls[best_results_gcn['dropout']],
                       "predictor_hidden_feats": gcn_predictor_hidden_feats_ls[best_results_gcn['predictor_hidden_feats']],
                       "predictor_dropout": gcn_predictor_dropout_ls[best_results_gcn['predictor_dropout']],
                       "other": "default"
                       }

    tr_performance = tr_aggr
    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)
        overview_df.to_parquet(overview_df_filename, index=0)        
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())

performing training and optimization
starting hyperparameter optimization
early stopping at epoch 142 with patience set to 50
early stopping at epoch 204 with patience set to 50
early stopping at epoch 87 with patience set to 50
early stopping at epoch 97 with patience set to 50
early stopping at epoch 103 with patience set to 50
early stopping at epoch 118 with patience set to 50
early stopping at epoch 176 with patience set to 50
early stopping at epoch 112 with patience set to 50
early stopping at epoch 91 with patience set to 50
early stopping at epoch 95 with patience set to 50
early stopping at epoch 98 with patience set to 50
early stopping at epoch 95 with patience set to 50
early stopping at epoch 160 with patience set to 50
early stopping at epoch 149 with patience set to 50
early stopping at epoch 153 with patience set to 50
early stopping at epoch 116 with patience set to 50
early stopping at epoch 98 with patience set to 50
early stopping at epoch 56 with patience set to 5

ArrowTypeError: ignored

## AttentiveFP

In [None]:
# attfp setup
model_name = "attfp" 
hyper_space = hspace_gnns[model_name]
filename_attfp = "attfp_opt"

if file_exists(filename_attfp+".sav") and file_exists(filename_attfp+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best AttentiveFP model is:")
    loaded_model = pickle.load(open(filename_attfp+".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_attfp + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_attfp = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {attfp_lr_ls[best_results_attfp['lr']]} | "
            f"L2 regularization {attfp_l2_ls[best_results_attfp['l2']]} | "
            f"num_layers {attfp_num_layers_ls[best_results_attfp['num_layers']]} | "
            f"num_timesteps {attfp_num_timesteps_ls[best_results_attfp['num_timesteps']]} | "
            f"graph_feat_size {attfp_graph_feat_size_ls[best_results_attfp['graph_feat_size']]} | "
            f"dropout {attfp_dropout_ls[best_results_attfp['dropout']]}"
            )
    print(text)
    
    # reconstruct best model
    model_hp_opt = AttentiveFPPredictor(node_feat_size = node_feat_dim,
                                        edge_feat_size = edge_feat_dim, 
                                        num_layers = attfp_num_layers_ls[best_results_attfp['num_layers']], 
                                        num_timesteps = attfp_num_timesteps_ls[best_results_attfp['num_timesteps']],
                                        graph_feat_size = attfp_graph_feat_size_ls[best_results_attfp['graph_feat_size']], 
                                        dropout = attfp_dropout_ls[best_results_attfp['dropout']])
    
    
    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=attfp_lr_ls[best_results_attfp["lr"]], weight_decay=attfp_l2_ls[best_results_attfp["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_attfp+"_hps", 'wb') as f:
        pickle.dump(best_results_attfp, f)
    # loadable via ...
    # with open(filename_attfp+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_attfp+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_attfp+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
        
        # reconstruct best model
        model_hp_opt = AttentiveFPPredictor(node_feat_size = node_feat_dim,
                                            edge_feat_size = edge_feat_dim, 
                                            num_layers = attfp_num_layers_ls[best_results_attfp['num_layers']], 
                                            num_timesteps = attfp_num_timesteps_ls[best_results_attfp['num_timesteps']],
                                            graph_feat_size = attfp_graph_feat_size_ls[best_results_attfp['graph_feat_size']], 
                                            dropout = attfp_dropout_ls[best_results_attfp['dropout']])
        
        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=attfp_lr_ls[best_results_attfp["lr"]], weight_decay=attfp_l2_ls[best_results_attfp["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    attfp_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    attfp_perf.to_parquet(filename_attfp + "_performance", index=0)      
    # loadable via ...
    # attfp_perf = pd.read_parquet(filename_attfp + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(attfp_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(attfp_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(attfp_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "AttentiveFP"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_attfp
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": attfp_lr_ls[best_results_attfp['lr']],
                       "L2 regularization": attfp_l2_ls[best_results_attfp['l2']],
                       "num_layers": attfp_num_layers_ls[best_results_attfp['num_layers']],
                       "num_timesteps": attfp_num_timesteps_ls[best_results_attfp['num_timesteps']],
                       "graph_feat_size": attfp_graph_feat_size_ls[best_results_attfp['graph_feat_size']],
                       "dropout": attfp_dropout_ls[best_results_attfp['dropout']],
                       "other": "default"
                       }

    tr_performance = tr_aggr
    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)        
        overview_df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())

performing training and optimization
starting hyperparameter optimization
early stopping at epoch 81 with patience set to 50
early stopping at epoch 84 with patience set to 50
early stopping at epoch 54 with patience set to 50
early stopping at epoch 120 with patience set to 50
early stopping at epoch 127 with patience set to 50
early stopping at epoch 102 with patience set to 50
early stopping at epoch 53 with patience set to 50
early stopping at epoch 86 with patience set to 50
early stopping at epoch 94 with patience set to 50
early stopping at epoch 50 with patience set to 50
early stopping at epoch 51 with patience set to 50
early stopping at epoch 102 with patience set to 50
early stopping at epoch 112 with patience set to 50
early stopping at epoch 53 with patience set to 50
early stopping at epoch 73 with patience set to 50
early stopping at epoch 132 with patience set to 50
early stopping at epoch 72 with patience set to 50
early stopping at epoch 70 with patience set to 50
ea

## GAT

In [None]:
# gat setup
model_name = "gat" 
hyper_space = hspace_gnns[model_name]
filename_gat = "gat_opt"

if file_exists(filename_gat+".sav") and file_exists(filename_gat+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best gat model is:")
    loaded_model = pickle.load(open(filename_gat+".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_gat + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_gat = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {gat_lr_ls[best_results_gat['lr']]} | "
            f"L2 regularization {gat_l2_ls[best_results_gat['l2']]} | "
            f"hidden_feats {gat_hidden_feats_ls[best_results_gat['hidden_feats']]} | "
            f"num_heads {gat_num_heads_ls[best_results_gat['num_heads']]} | "
            f"predictor_hidden_feats {gat_predictor_hidden_feats_ls[best_results_gat['predictor_hidden_feats']]} | "
            f"predictor_dropout {gat_predictor_dropout_ls[best_results_gat['predictor_dropout']]}"
            )
    print(text)
    
    # reconstruct best model
    model_hp_opt =GATPredictor(in_feats  = node_feat_dim,
                               hidden_feats = gat_hidden_feats_ls[best_results_gat['hidden_feats']],
                               num_heads = gat_num_heads_ls[best_results_gat['num_heads']],
                               predictor_hidden_feats = gat_predictor_hidden_feats_ls[best_results_gat['predictor_hidden_feats']], 
                               predictor_dropout= gat_predictor_dropout_ls[best_results_gat['predictor_dropout']])
    
    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=gat_lr_ls[best_results_gat["lr"]], weight_decay=gat_l2_ls[best_results_gat["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_gat+"_hps", 'wb') as f:
        pickle.dump(best_results_gat, f)
    # loadable via .at
    # with open(filename_gat+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_gat+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_gat+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
        
        # reconstruct best model
        model_hp_opt =GATPredictor(in_feats  = node_feat_dim,
                                hidden_feats = gat_hidden_feats_ls[best_results_gat['hidden_feats']],
                                num_heads = gat_num_heads_ls[best_results_gat['num_heads']],
                                predictor_hidden_feats = gat_predictor_hidden_feats_ls[best_results_gat['predictor_hidden_feats']], 
                                predictor_dropout= gat_predictor_dropout_ls[best_results_gat['predictor_dropout']]) 
               
        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=gat_lr_ls[best_results_gat["lr"]], weight_decay=gat_l2_ls[best_results_gat["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    gat_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    gat_perf.to_parquet(filename_gat + "_performance", index=0)      
    # loadable via ...
    # gat_perf = pd.read_parquet(filename_gat + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(gat_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(gat_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(gat_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "GAT"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_gat
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": gat_lr_ls[best_results_gat['lr']],
                       "L2 regularization": gat_l2_ls[best_results_gat['l2']],
                       "hidden_feats": gat_hidden_feats_ls[best_results_gat['hidden_feats']],
                       "num_heads": gat_num_heads_ls[best_results_gat['num_heads']],
                       "predictor_hidden_feats": gat_predictor_hidden_feats_ls[best_results_gat['predictor_hidden_feats']],
                       "predictor_dropout": gat_predictor_dropout_ls[best_results_gat['predictor_dropout']],
                       "other": "default"
                       }


    tr_performance = tr_aggr
    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)        
        overview_df.to_parquet(overview_df_filename, index=0)        
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())

performing training and optimization
starting hyperparameter optimization
early stopping at epoch 66 with patience set to 50
early stopping at epoch 115 with patience set to 50
early stopping at epoch 74 with patience set to 50
early stopping at epoch 70 with patience set to 50
early stopping at epoch 73 with patience set to 50
early stopping at epoch 95 with patience set to 50
early stopping at epoch 122 with patience set to 50
early stopping at epoch 97 with patience set to 50
early stopping at epoch 183 with patience set to 50
early stopping at epoch 103 with patience set to 50
early stopping at epoch 69 with patience set to 50
early stopping at epoch 139 with patience set to 50
early stopping at epoch 111 with patience set to 50
early stopping at epoch 143 with patience set to 50
early stopping at epoch 79 with patience set to 50
early stopping at epoch 80 with patience set to 50
early stopping at epoch 92 with patience set to 50
early stopping at epoch 141 with patience set to 50


_______________________________________________________

## MPNN

In [None]:
# mpnn setup
model_name = "mpnn"
hyper_space = hspace_gnns[model_name]
filename_mpnn = "mpnn_opt"

if file_exists(filename_mpnn+".sav") and file_exists(filename_mpnn+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best MPNN model is:")
    loaded_model = pickle.load(open(filename_mpnn +".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_mpnn + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_mpnn = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {mpnn_lr_ls[best_results_mpnn['lr']]} | "
            f"L2 regularization {mpnn_l2_ls[best_results_mpnn['l2']]} | "
            f"node_out_feats {mpnn_node_out_feats_ls[best_results_mpnn['node_out_feats']]} | "
            f"edge_hidden_feats {mpnn_edge_hidden_feats_ls[best_results_mpnn['edge_hidden_feats']]} | "
            f"num_step_message_passing {mpnn_num_step_message_passing_ls[best_results_mpnn['num_step_message_passing']]} | "
            f"num_step_set2set {mpnn_num_step_set2set_ls[best_results_mpnn['num_step_set2set']]}"
            )

    print(text)
    
    # reconstruct best model
    model_hp_opt = MPNNPredictor(node_in_feats = node_feat_dim ,
                                 edge_in_feats = edge_feat_dim,
                                 node_out_feats = mpnn_node_out_feats_ls[best_results_mpnn['node_out_feats']],
                                 edge_hidden_feats = mpnn_edge_hidden_feats_ls[best_results_mpnn['edge_hidden_feats']],
                                 num_step_message_passing  = mpnn_num_step_message_passing_ls[best_results_mpnn['num_step_message_passing']],
                                 num_step_set2set = mpnn_num_step_set2set_ls[best_results_mpnn['num_step_set2set']]
                                 )
    
    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=mpnn_lr_ls[best_results_mpnn["lr"]], weight_decay=mpnn_l2_ls[best_results_mpnn["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_mpnn+"_hps", 'wb') as f:
        pickle.dump(best_results_mpnn, f)
    # loadable via ...
    # with open(filename_mpnn+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_mpnn+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_mpnn+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
        
        # reconstruct best model
        model_hp_opt = MPNNPredictor(node_in_feats = node_feat_dim ,
                                    edge_in_feats = edge_feat_dim,
                                    node_out_feats = mpnn_node_out_feats_ls[best_results_mpnn['node_out_feats']],
                                    edge_hidden_feats = mpnn_edge_hidden_feats_ls[best_results_mpnn['edge_hidden_feats']],
                                    num_step_message_passing  = mpnn_num_step_message_passing_ls[best_results_mpnn['num_step_message_passing']],
                                    num_step_set2set = mpnn_num_step_set2set_ls[best_results_mpnn['num_step_set2set']]
                                    )      
          
        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=mpnn_lr_ls[best_results_mpnn["lr"]], weight_decay=mpnn_l2_ls[best_results_mpnn["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    mpnn_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    mpnn_perf.to_parquet(filename_mpnn + "_performance", index=0)      
    # loadable via ...
    # mpnn_perf = pd.read_parquet(filename_mpnn + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(mpnn_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(mpnn_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(mpnn_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "MPNN"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_mpnn
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": mpnn_lr_ls[best_results_mpnn['lr']],
                       "L2 regularization": mpnn_l2_ls[best_results_mpnn['l2']],
                       "node_out_feats": mpnn_node_out_feats_ls[best_results_mpnn['node_out_feats']],
                       "edge_hidden_feats": mpnn_edge_hidden_feats_ls[best_results_mpnn['edge_hidden_feats']],
                       "num_step_message_passing": mpnn_num_step_message_passing_ls[best_results_mpnn['num_step_message_passing']],
                       "num_step_set2set": mpnn_num_step_set2set_ls[best_results_mpnn['num_step_set2set']],
                       "other": "default"
                       }

    tr_performance = tr_aggr
    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)        
        overview_df.to_parquet(overview_df_filename, index=0)        
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())

performing training and optimization
starting hyperparameter optimization
early stopping at epoch 116 with patience set to 50
  0%|          | 0/30 [26:59<?, ?it/s, best loss: ?]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 55 with patience set to 50
  3%|▎         | 1/30 [35:40<13:03:19, 1620.67s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 60 with patience set to 50
  7%|▋         | 2/30 [2:16:52<7:34:19, 973.55s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 10%|█         | 3/30 [3:06:19<24:46:26, 3303.20s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 13%|█▎        | 4/30 [3:19:56<22:53:32, 3169.71s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 55 with patience set to 50
 17%|█▋        | 5/30 [3:38:10<16:07:00, 2320.83s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 72 with patience set to 50
 20%|██        | 6/30 [3:55:40<12:41:28, 1903.68s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 173 with patience set to 50
 23%|██▎       | 7/30 [4:33:22<10:22:46, 1624.64s/it, best loss: 0.33743018812463255]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 72 with patience set to 50
 27%|██▋       | 8/30 [4:53:10<11:10:02, 1827.39s/it, best loss: 0.2304128698804624]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 30%|███       | 9/30 [5:04:42<9:29:41, 1627.69s/it, best loss: 0.2304128698804624]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 125 with patience set to 50
 33%|███▎      | 10/30 [5:28:40<7:26:15, 1338.77s/it, best loss: 0.2304128698804624]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 114 with patience set to 50
 37%|███▋      | 11/30 [6:29:12<7:13:32, 1369.09s/it, best loss: 0.22634969625710377]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 125 with patience set to 50
 40%|████      | 12/30 [6:52:18<10:17:18, 2057.69s/it, best loss: 0.22634969625710377]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 43%|████▎     | 13/30 [7:22:48<8:45:16, 1853.89s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



 47%|████▋     | 14/30 [7:22:49<8:12:32, 1847.00s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 69 with patience set to 50
 50%|█████     | 15/30 [8:12:58<8:11:27, 1965.85s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 156 with patience set to 50
 53%|█████▎    | 16/30 [8:59:28<6:14:34, 1605.32s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 57%|█████▋    | 17/30 [9:10:39<7:04:58, 1961.44s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 54 with patience set to 50
 60%|██████    | 18/30 [9:24:53<5:14:44, 1573.71s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 50 with patience set to 50
 63%|██████▎   | 19/30 [9:45:22<4:08:53, 1357.63s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 100 with patience set to 50
 67%|██████▋   | 20/30 [10:41:34<3:39:51, 1319.19s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 159 with patience set to 50
 70%|███████   | 21/30 [11:11:04<4:50:17, 1935.23s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 128 with patience set to 50
 73%|███████▎  | 22/30 [11:35:44<4:11:25, 1885.65s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 114 with patience set to 50
 77%|███████▋  | 23/30 [12:39:45<3:25:47, 1763.91s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 152 with patience set to 50
 80%|████████  | 24/30 [13:09:17<3:58:43, 2387.25s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 178 with patience set to 50
 83%|████████▎ | 25/30 [13:30:11<3:03:31, 2202.37s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



 87%|████████▋ | 26/30 [13:30:11<2:07:50, 1917.69s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



 90%|█████████ | 27/30 [14:07:13<1:40:27, 2009.01s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 52 with patience set to 50
 93%|█████████▎| 28/30 [14:57:52<1:09:58, 2099.23s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



early stopping at epoch 96 with patience set to 50
 97%|█████████▋| 29/30 [15:15:46<28:08, 1688.39s/it, best loss: 0.20944481187536756]

  self.dropout, self.training, self.bidirectional, self.batch_first)

  self.dropout, self.training, self.bidirectional, self.batch_first)



100%|██████████| 30/30 [15:15:47<00:00, 1831.57s/it, best loss: 0.20944481187536756]
the best GNN hyperparameters are: learning rate 0.0031622776601683794 | L2 regularization 0.0001 | node_out_feats 16 | edge_hidden_feats 16 | num_step_message_passing 2 | num_step_set2set 4
reconstructing and training model on best hyperparameters
performing repetitions on different seeds
early stopping at epoch 94 with patience set to 50


  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)


early stopping at epoch 95 with patience set to 50


  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)


early stopping at epoch 161 with patience set to 50


  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)


early stopping at epoch 194 with patience set to 50


  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)
  self.dropout, self.training, self.bidirectional, self.batch_first)


model information added to experimentation overview
|    | model_type   | filename                   | GPU_accelerator      | RAM      | data_features                                      | hyperparameters                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     | train_performance_ROC-AUC_avg/std/max   | valid_performance_ROC-AUC_avg/std/max   | test_performance_ROC-AUC_avg/std/max   |
|---:|:-------------|:---------------------------|:---------------------|:---------|:---------------------------------------------------|:-------------

## Weave

In [None]:
# weave setup
model_name = "weave" 
hyper_space = hspace_gnns[model_name]
filename_weave = "weave_opt"

if file_exists(filename_weave+".sav") and file_exists(filename_weave+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best Weave model is:")
    loaded_model = pickle.load(open(filename_weave+".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_weave + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_weave = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {weave_lr_ls[best_results_weave['lr']]} | "
            f"L2 regularization {weave_l2_ls[best_results_weave['l2']]} | "
            f"gnn_hidden_feats {weave_gnn_hidden_feats_ls[best_results_weave['gnn_hidden_feats']]} | "
            f"num_gnn_layers {weave_num_gnn_layers_ls[best_results_weave['num_gnn_layers']]} | "
            f"graph_feats {weave_graph_feats_ls[best_results_weave['graph_feats']]}"
            )
    
    print(text)
    
    # reconstruct best model
    model_hp_opt = WeavePredictor(node_in_feats  = node_feat_dim,
                                  edge_in_feats =  edge_feat_dim,
                                  num_gnn_layers = weave_num_gnn_layers_ls[best_results_weave['num_gnn_layers']],
                                  gnn_hidden_feats = weave_gnn_hidden_feats_ls[best_results_weave['gnn_hidden_feats']],
                                  graph_feats = weave_graph_feats_ls[best_results_weave['graph_feats']])  
      
    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=weave_lr_ls[best_results_weave["lr"]], weight_decay=weave_l2_ls[best_results_weave["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_weave+"_hps", 'wb') as f:
        pickle.dump(best_results_weave, f)
    # loadable via ...
    # with open(filename_weave+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_weave+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_weave+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
        
        model_hp_opt = WeavePredictor(node_in_feats  = node_feat_dim,
                        edge_in_feats =  edge_feat_dim,
                        num_gnn_layers = weave_num_gnn_layers_ls[best_results_weave['num_gnn_layers']],
                        gnn_hidden_feats = weave_gnn_hidden_feats_ls[best_results_weave['gnn_hidden_feats']],
                        graph_feats = weave_graph_feats_ls[best_results_weave['graph_feats']])  
        
        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=weave_lr_ls[best_results_weave["lr"]], weight_decay=weave_l2_ls[best_results_weave["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    weave_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    weave_perf.to_parquet(filename_weave + "_performance", index=0)      
    # loadable via ...
    # weave_perf = pd.read_parquet(filename_weave + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(weave_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(weave_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(weave_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "Weave"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_weave
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": weave_lr_ls[best_results_weave['lr']],
                       "L2 regularization": weave_l2_ls[best_results_weave['l2']],
                       "gnn_hidden_feats": weave_gnn_hidden_feats_ls[best_results_weave['gnn_hidden_feats']],
                       "num_gnn_layers": weave_num_gnn_layers_ls[best_results_weave['num_gnn_layers']],
                       "graph_feats": weave_graph_feats_ls[best_results_weave['graph_feats']],
                       "other": "default"
                       }

    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)
        overview_df.to_parquet(overview_df_filename, index=0)        
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())

performing training and optimization
starting hyperparameter optimization
early stopping at epoch 51 with patience set to 50
early stopping at epoch 50 with patience set to 50
early stopping at epoch 88 with patience set to 50
early stopping at epoch 58 with patience set to 50
early stopping at epoch 140 with patience set to 50
early stopping at epoch 132 with patience set to 50
early stopping at epoch 132 with patience set to 50
early stopping at epoch 102 with patience set to 50
early stopping at epoch 185 with patience set to 50
early stopping at epoch 53 with patience set to 50
early stopping at epoch 70 with patience set to 50
early stopping at epoch 51 with patience set to 50
early stopping at epoch 169 with patience set to 50
early stopping at epoch 56 with patience set to 50
early stopping at epoch 126 with patience set to 50
early stopping at epoch 176 with patience set to 50
early stopping at epoch 147 with patience set to 50
early stopping at epoch 147 with patience set to 5

## NFP

In [None]:
# nfp setup
model_name = "nfp" 
hyper_space = hspace_gnns[model_name]
filename_nfp = "nfp_opt"

if file_exists(filename_nfp+".sav") and file_exists(filename_nfp+"_performance"):
    print("no training and optimization needed, everything can be loaded")
    
    # model
    print('\n')
    print("best NFP model is:")
    loaded_model = pickle.load(open(filename_nfp+".sav", 'rb'))
    print(loaded_model)

    # performance
    perf_df = pd.read_parquet(filename_nfp + "_performance")
    print('\n')
    print(f"mean ROC-AUC across {repetitions} different seeds")
    print(f"train: {round(np.average(perf_df['auc_roc'][0]), 5)} | validation: {round(np.average(perf_df['auc_roc'][1]), 5)}, test: {round(np.average(perf_df['auc_roc'][2]), 5)}")

    # overview
    print('\n')
    print("experimentation overview:")
    # load 
    overview_df = pd.read_parquet(overview_df_filename)
    print(overview_df.to_markdown())

else:
    print("performing training and optimization")

    # hyperparameter optimization
    print("starting hyperparameter optimization")
    trials = Trials()
    best_results_nfp = fmin(hyper_opt, hyper_space, algo=tpe.suggest, max_evals=OPT_ITERS, trials=trials)
    text = (
            "the best GNN hyperparameters are: "
            f"learning rate {nfp_lr_ls[best_results_nfp['lr']]} | "
            f"L2 regularization {nfp_l2_ls[best_results_nfp['l2']]} | "
            f"hidden_feats {nfp_hidden_feats_ls[best_results_nfp['hidden_feats']]} | "
            f"dropout {nfp_dropout_ls[best_results_nfp['dropout']]} | "
            f"predictor_hidden_size {nfp_predictor_hidden_size_ls[best_results_nfp['predictor_hidden_size']]} | "
            f"predictor_dropout {nfp_predictor_dropout_ls[best_results_nfp['predictor_dropout']]}"
            )
    print(text)
    
    # reconstruct best model
    model_hp_opt = NFPredictor(in_feats = node_feat_dim,
                               hidden_feats = nfp_hidden_feats_ls[best_results_nfp['hidden_feats']],
                               dropout = nfp_dropout_ls[best_results_nfp['dropout']],
                               predictor_hidden_size = nfp_predictor_hidden_size_ls[best_results_nfp['predictor_hidden_size']],
                               predictor_dropout = nfp_predictor_dropout_ls[best_results_nfp['predictor_dropout']]
                               )


    print("reconstructing and training model on best hyperparameters") 
    # reconstruct best optimizer
    best_opt = optim.Adam(model_hp_opt.parameters(), lr=nfp_lr_ls[best_results_nfp["lr"]], weight_decay=nfp_l2_ls[best_results_nfp["l2"]])

    # reset random seed
    seed=0
    torch.manual_seed(seed) 
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed) 
    
    # training
    loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
    # stopper
    model_hp_opt.to(device)

    #  early stopping
    increase_idx = 0
    best_auc = 0
    
    # training
    for j in range(epochs):
        
        # training
        train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

        # roc auc
        train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
        valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

        loss = 1 - valid_auc
            
        # checks if there was an update
        if valid_auc > best_auc:
            increase_idx = j
            # update valid_aicj
            best_auc = valid_auc
            # save model
            best_model = copy.deepcopy(model_hp_opt)
        # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
        # check how long there was no improvement
        if j - increase_idx >= patience:
            print(f"early stopping at epoch {j} with patience set to {patience}")
            break
    
    # save hyperparameters
    with open(filename_nfp+"_hps", 'wb') as f:
        pickle.dump(best_results_nfp, f)
    # loadable via ...
    # with open(filename_nfp+"_hps", 'rb') as f:
    #    loaded_dict = pickle.load(f)

    # save best model
    pickle.dump(best_model, open(filename_nfp+".sav", 'wb'))
    # loadable via ...
    # best_model = pickle.load(open(filename_nfp+".sav", 'rb'))

    # repetitions for performance on different seeds
    tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc = [], [], [], [], [], [], [], [], [], []
    va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc = [], [], [], [], [], [], [], [], [], []
    te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc = [], [], [], [], [], [], [], [], [], []

    tr_lst = [tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc]
    va_lst = [va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc]
    te_lst = [te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]

    # repetitions
    print("performing repetitions on different seeds")
    for i in range(repetitions):
        # first replicate model with initial seed
        if i == 0:
            seed = 0
        else:
            seed = np.random.randint(1, 999999) # all but initial random seed of 0
        
        torch.manual_seed(seed) 
        if torch.cuda.is_available():
            torch.cuda.manual_seed(seed) 
                
        # reconstruct best model
        model_hp_opt = NFPredictor(in_feats = node_feat_dim,
                                hidden_feats = nfp_hidden_feats_ls[best_results_nfp['hidden_feats']],
                                dropout = nfp_dropout_ls[best_results_nfp['dropout']],
                                predictor_hidden_size = nfp_predictor_hidden_size_ls[best_results_nfp['predictor_hidden_size']],
                                predictor_dropout = nfp_predictor_dropout_ls[best_results_nfp['predictor_dropout']]
                                )


        # reconstruct best optimizer
        best_opt = optim.Adam(model_hp_opt.parameters(), lr=nfp_lr_ls[best_results_nfp["lr"]], weight_decay=nfp_l2_ls[best_results_nfp["l2"]])

        # training
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
        # stopper
        model_hp_opt.to(device)

        #  early stopping
        increase_idx = 0
        best_auc = 0
        
        # training
        for j in range(epochs):
            
            # training
            train_loss = train_epoch(model_hp_opt, device, train_loader, best_opt, loss_fn)

            # roc auc
            train_auc = eval_epoch(model_hp_opt, device, train_loader, evaluator)
            valid_auc = eval_epoch(model_hp_opt, device, valid_loader, evaluator)

            loss = 1 - valid_auc
                
            # checks if there was an update
            if valid_auc > best_auc:
                increase_idx = j
                # update valid_aicj
                best_auc = valid_auc
                # save model
                best_model = copy.deepcopy(model_hp_opt)
            # print(f"epoch {j}, valid_auc {valid_auc}, best_auc {best_auc}, epochs with no impr {j - increase_idx}")
            # check how long there was no improvement
            if j - increase_idx >= patience:
                print(f"early stopping at epoch {j} with patience set to {patience}")
                break        

        # training metrics calc
        tr_metrics = list(eval_epoch_statistical(best_model, device, train_loader, evaluator))

        # validation metric calc
        va_metrics = list(eval_epoch_statistical(best_model, device, valid_loader, evaluator))

        # test metric calc
        te_metrics = list(eval_epoch_statistical(best_model, device, test_loader, evaluator))

        # creating dataframe
        for j in range(len(tr_lst)):               
            tr_lst[j].append(tr_metrics[j])
            va_lst[j].append(va_metrics[j])
            te_lst[j].append(te_metrics[j])

    metric_cls = ["tn", "fp", "fn", "tp", "se", "sp", "acc", "mcc", "auc_prc", "auc_roc"] 
    metrics_data = [["train", tr_tns, tr_fps, tr_fns, tr_tp, tr_se, tr_sp, tr_acc, tr_mcc, tr_auc_prc, tr_auc_roc],
                    ["validation", va_tns, va_fps, va_fns, va_tp, va_se, va_sp, va_acc, va_mcc, va_auc_prc, va_auc_roc],
                    ["test", te_tns, te_fps, te_fns, te_tp, te_se, te_sp, te_acc, te_mcc, te_auc_prc, te_auc_roc]]
    nfp_perf = pd.DataFrame(metrics_data, columns = ["split"] + metric_cls)
    
    # save performance df
    nfp_perf.to_parquet(filename_nfp + "_performance", index=0)      
    # loadable via ...
    # nfp_perf = pd.read_parquet(filename_nfp + "_performance")

    # add model info 
    cols = ["avg_auc_roc", "std_auc_roc", "top_roc_auc"]
    tr_aggr = []
    va_aggr = []
    te_aggr = []
    results = [tr_aggr, va_aggr, te_aggr]

    for i in range(len(results)):
        # avg_auc_roc
        results[i].append(round(np.average(nfp_perf["auc_roc"][i]), 5))
        # std_auc_roc
        results[i].append(round(np.std(nfp_perf["auc_roc"][i]), 5))
        # top_roc_auc
        results[i].append(round(np.max(nfp_perf["auc_roc"][i]), 5))

    cls = ["model_type", "filename", "GPU_accelerator", "RAM", "data_features", "hyperparameters", "train_performance_ROC-AUC_avg/std/max", "valid_performance_ROC-AUC_avg/std/max", "test_performance_ROC-AUC_avg/std/max"]
    model_type = "NFP"
    data_features = f"molecule graphs with {node_feat_dim} node feats" if model_name in ["gcn", "gat", "nfp"] else f"molecule graphs with {node_feat_dim} node feats and {edge_feat_dim} edge feats"
    filename = filename_nfp
    GPU_info = !nvidia-smi -L
    GPU_accelerator = re.search(r"\: (.*?)\(", str(GPU_info)).group(1)
    RAM = f"{round(virtual_memory().total / 1e9, 2)} GB"
    
    hyperparameters = {"learning rate": nfp_lr_ls[best_results_nfp['lr']],
                       "L2 regularization": nfp_l2_ls[best_results_nfp['l2']],
                       "hidden_feats": nfp_hidden_feats_ls[best_results_nfp['hidden_feats']],
                       "dropout": nfp_dropout_ls[best_results_nfp['dropout']],
                       "predictor_hidden_size": nfp_predictor_hidden_size_ls[best_results_nfp['predictor_hidden_size']],
                       "predictor_dropout": nfp_predictor_dropout_ls[best_results_nfp['predictor_dropout']],
                       "other": "default"
                       }


    tr_performance = tr_aggr
    va_performance = va_aggr
    te_performance = te_aggr

    # does overview table exist?
    if not file_exists(overview_df_filename):
        # create dataframe with model info
        info = [[model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]]
        df = pd.DataFrame(info, columns=cls)
        # save
        print("saving information")
        df.to_parquet(overview_df_filename, index=0)
        print("model information added to experimentation overview")
        print(df.to_markdown())
    else:
        # load 
        overview_df = pd.read_parquet(overview_df_filename)

        # add row for model
        new_row = {}
        keys = cls
        values = [model_type, filename, GPU_accelerator, RAM, data_features, hyperparameters, tr_performance, va_performance, te_performance]
        for key in keys:
            for value in values:
                new_row[key] = value
                values.remove(value)
                break 
        overview_df = overview_df.append(new_row, ignore_index=True)
        # save
        overview_df["hyperparameters"]= overview_df["hyperparameters"].astype(str)
        overview_df.to_parquet(overview_df_filename, index=0)        
        print("model information added to experimentation overview")
        print(overview_df.to_markdown())