In [1]:
import pandas as pd
import numpy as np
import dgl
import createGraph
from importlib import reload
import torch
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import architecture
import training_loop
from bayes_opt import BayesianOptimization
from torch.utils.data import DataLoader 
from bayes_opt.util import UtilityFunction


In [2]:
# Assign new indices to 2nd graph
def offset_node_batches(node_batches, offset):
    return [[idx + offset for idx in batch] for batch in node_batches]

In [3]:
# Load in graphs directly from binary files
train_g, train_labels_dict = dgl.load_graphs('upsampledTrain.bin')
val_g, val_labels_dict = dgl.load_graphs('nonupsampledTest.bin')

# Assign labels 
train_y = train_labels_dict['gLabel']
val_y = val_labels_dict['gLabel']

# Make a list of cumulative sums of nodes for each graph in training and val set
train_cumsum_nodes = np.cumsum([g.number_of_nodes() for g in train_g])
val_cumsum_nodes = np.cumsum([g.number_of_nodes() for g in val_g])

In [4]:
# Create a list of lists containg indices of nodes for each graph in training and val set
train_node_batches = [list(range(train_cumsum_nodes[i - 1] if i > 0 else 0, train_cumsum_nodes[i])) for i in range(len(train_g))]
val_node_batches = [list(range(val_cumsum_nodes[i - 1] if i > 0 else 0, val_cumsum_nodes[i])) for i in range(len(val_g))]

In [5]:
# Last indexed value from train_cumsum_nodes is total number of nodes in training set
num_train_nodes = train_cumsum_nodes[-1]
# Add number of train nodes to each idx in val set 
offset_val_node_batches = offset_node_batches(val_node_batches, num_train_nodes)
# Combine into single graph
combined_g = dgl.batch(train_g + val_g)
# Get total number of batches/indices 
combined_node_batches = train_node_batches + offset_val_node_batches
# Combine labels for train and val into single tensor
combined_y = torch.cat([train_y, val_y])
# Mask to select training set
train_mask = list(range(len(train_y)))
# Mask to select val set
val_mask = list(range(len(train_y), len(train_y) + len(val_y)))

In [6]:
import architecture
import training_loop
reload(architecture)
reload(training_loop)

def train_with_hyperparams(lr, num_batches, max_epochs, lamb_beta1, lamb_beta2, lamb_eps, lamb_wd, lookahead_k, lookahead_alpha,dropout_rate,l1_lambda):
    # Format hyperparameters 
    lr = 10**lr
    lamb_eps = 10**lamb_eps
    lamb_wd = 10**lamb_wd
    l1_lambda = 10**l1_lambda
    num_batches = int(num_batches)
    max_epochs = int(max_epochs)
    lookahead_k = int(lookahead_k)
    model = architecture.GCN(combined_g.ndata['feat'].shape[1], 30, 30, dropout_rate)

    best_val_bal_acc = training_loop.train(
        g=combined_g, 
        node_batches=combined_node_batches, 
        model=model, 
        labels=combined_y, 
        train_mask=train_mask, 
        val_mask=val_mask, 
        validate=True, 
        test=False, 
        init_lr=lr, 
        num_batches=num_batches, 
        max_epochs=max_epochs,
        lamb_beta1=lamb_beta1,
        lamb_beta2=lamb_beta2,
        lamb_eps=lamb_eps,
        lamb_wd=lamb_wd,
        lookahead_k=lookahead_k, 
        lookahead_alpha=lookahead_alpha,
        l1_lambda=l1_lambda
    )
    
    return best_val_bal_acc


hyperparams_bounds = {
    'lr': (-5, -0.5), # learning rate, controls convergence to loss function minimum
    'num_batches': (1, 125), # number of batches training set is divided into
    'max_epochs': (50, 100), # maximum number of passes through the data set the model goes through
    'lamb_beta1': (0.8, 0.999), # controls decay rate of first moment estimate
    'lamb_beta2': (0.8, 0.999), # controls decay rate of second moment estimate
    'lamb_eps': (-8, -4), # small constant added to denominator for numerical stability
    'lamb_wd': (-5, -1), # weight decay to prevent overfitting
    'lookahead_k': (3, 10), # number of steps of slow weight compared faster weight
    'lookahead_alpha': (0.3, 0.7), # slow weight step size
    'dropout_rate': (0.001, 0.8), # dropout rate
    'l1_lambda':(-5,-3) # l1 regularization coefficient
}
# Initialize bayesian optimization
optimizer = BayesianOptimization(f=train_with_hyperparams, pbounds=hyperparams_bounds, random_state=50)
# Set acquisition function as Expected Improvement where a larger 'xi' values encourages more exploration vs. exploitation 
utility = UtilityFunction(kind="ei", xi=0.01)
# Set parameters for Gaussian Process
optimizer.set_gp_params(normalize_y=True, alpha=1e-6)
# Maximize validation accuracy performing 20 steps of random exploration and 10 steps of baysian optimization
optimizer.maximize(init_points=20, n_iter=10, acquisition_function=utility)


|   iter    |  target   | dropou... | l1_lambda | lamb_b... | lamb_b... | lamb_eps  |  lamb_wd  | lookah... | lookah... |    lr     | max_ep... | num_ba... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------


  assert input.numel() == input.storage().size(), (


Best scores in iteration:  {'epoch': 7, 'train_loss': 0.6838671565055847, 'train_acc': 0.5764989256858826, 'train_bal_acc': 0.5751935543789697, 'train_recall': 0.6452344931921331, 'train_precision': 0.57508848811731, 'train_f1': 0.6081454415827466, 'train_auc': 0.5751935543789697, 'val_loss': 0.6808741092681885, 'val_acc': 0.5611045956611633, 'val_bal_acc': 0.5049006942788244, 'val_recall': 0.6427378964941569, 'val_precision': 0.7070707070707071, 'val_f1': 0.6733712286838653, 'val_auc': 0.5049006942788246}
| [0m1        [0m | [0m0.5049   [0m | [0m0.3962   [0m | [0m-4.544   [0m | [0m0.8508   [0m | [0m0.8789   [0m | [0m-6.491   [0m | [0m-1.014   [0m | [0m0.4633   [0m | [0m8.403    [0m | [0m-1.578   [0m | [0m65.5     [0m | [0m43.97    [0m |
Best scores in iteration:  {'epoch': 9, 'train_loss': 0.688248872756958, 'train_acc': 0.5011317133903503, 'train_bal_acc': 0.5034049820718229, 'train_recall': 0.38142965204236007, 'train_precision': 0.5138198955547064, 'train_

KeyboardInterrupt: 

In [None]:
reload(architecture)
reload(training_loop)

model = architecture.GCN(combined_g.ndata['feat'].shape[1], 30, 30)

training_loop.train(g=combined_g, node_batches=combined_node_batches, model=model, labels=combined_y, train_mask=train_mask, val_mask=val_mask, validate=True, test=False)