# Run PncA WandB Sweep

In [1]:
from IPython.display import display
import os

if "SSH_CONNECTION" in os.environ:
    display("Running via SSH")
else:
    display("Running locally")
    
import sys
import os

path = os.path.join('..', '/Users/dylandissanayake/Desktop/DPhil/Comp Disc/Repositories/TB-PNCA-GNN') if "SSH_CONNECTION" not in os.environ else os.path.join('..', '/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn')
if path not in sys.path:
    sys.path.append(os.path.abspath(path))

import datetime
import random

import numpy as np
import pandas as pd

import torch
from torch_geometric.data import Data

import wandb

import warnings
warnings.filterwarnings('ignore')

from src import run_model, protein_graph, gcn_model, evaluation

%load_ext autoreload
%autoreload 2

%aimport src

torch.cuda.is_available()

'Running locally'

False

In [2]:
full_dataset = torch.load('datasets/full_real_dataset.pth')

### Set Up Params and Sweep Config

In [3]:
seed = 42
np.random.seed(seed)
random.seed(seed)

# logging params (only used for wandb metrics)
n_samples = len(full_dataset)
# cutoff_distance = 6.3  

# gcn params
num_node_features = 12
batch_size = 256
# hidden_channels = 64
# learning_rate = 0.001
# wd = 5e-5
epochs = 1500

wt_seq = 'MRALIIVDVQNDFCEGGSLAVTGGAALARAISDYLAEAADYHHVVATKDFHIDPGDHFSGTPDYSSSWPPHCVSGTPGADFHPSLDTSAIEAVFYKGAYTGAYSGFEGVDENGTPLLNWLRQRGVDEVDVVGIATDHCVRQTAEDAVRNGLATRVLVDLTAGVSADTTVAALEEMRTASVELVCS'

In [4]:
# First sweep:

sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'Test Accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'hidden_channels': {
        'values': [32, 64, 128, 256]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-2
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5, 0.6, 0.8]
        },
    'cutoff_distance': {
        'distribution': 'uniform',
        'min': 3.5,
        'max': 10 
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-1
      },
    }

sweep_config['parameters'] = parameters_dict

# Second / third sweep:

parameters_dict.update({
    'hidden_channels': {
        'values': [64, 128, 192, 256, 320, 384]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 5e-7, 
        'max': 1e-2
        },
    'dropout': {
          'values': [0.4, 0.5, 0.6, 0.8]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-2 
        },
    })

# Fourth / Fifth sweep:

parameters_dict.update({
    'hidden_channels': {
        'values': [128, 192, 256, 320]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3
        },
    'dropout': {
          'values': [0.4, 0.5, 0.6, 0.8]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-5,
        'max': 5e-2
        },
    })

# Sixth sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-6,
        'max': 5e-3 
        }
    })

# Seventh sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-6,
        'max': 5e-4 
        }
    })

# Eight sweep w cutoff varying:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-6,
        'max': 1e-3 
        },
        'cutoff_distance': {
        'distribution': 'uniform',
        'min': 5.5,
        'max': 14
        },
    })



In [5]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Test Accuracy'},
 'parameters': {'cutoff_distance': {'distribution': 'uniform',
                                    'max': 14,
                                    'min': 5.5},
                'dropout': {'values': [0.4, 0.5, 0.6, 0.8]},
                'hidden_channels': {'values': [128, 192, 256, 320]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.001,
                                  'min': 5e-06},
                'weight_decay': {'distribution': 'log_uniform_values',
                                 'max': 0.001,
                                 'min': 1e-05}}}


### Define Training Loop

In [6]:
project = "pnca-singletons-sweep"

In [7]:
# sweep_id = wandb.sweep(sweep_config, project=project)
sweep_id = 'nc8eahq7'

In [8]:
def sweep_run():

    with wandb.init() as run:
        config = run.config

        model = run_model.pnca_simpleGCN(
            # sequences=sequences_dict,
            sequences=None,
            self_loops = False,
            cutoff_distance = config.cutoff_distance,
            edge_weight_func = '1-(dist/cutoff)',
            # edge_weight_func = 'none',
            batch_size = batch_size,
            num_node_features = num_node_features,
            hidden_channels = config.hidden_channels,
            learning_rate = config.learning_rate,
            wd = config.weight_decay,
            dropout = config.dropout,
            lr_scheduling=False,
            epochs = epochs,
            dataset = full_dataset,
            normalise_ews=True,
            wandb_params={
                'use_wandb': False,
                'sweep': True
            }
        )

        # os.makedirs(f'saved_models/carter_ds_aug/{project}/{sweep_id}', exist_ok=True)
        
        # torch.save(model, f'saved_models/carter_ds_aug/{project}/{sweep_id}/{run.name}')

In [9]:
wandb.agent(sweep_id, sweep_run, project = project, count=10)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Agent Starting Run: l3gb54ch with config:
[34m[1mwandb[0m: 	cutoff_distance: 6.214923452698586
[34m[1mwandb[0m: 	dropout: 0.8
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 9.724685079274246e-06
[34m[1mwandb[0m: 	weight_decay: 0.00011731121453622746
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdylan-home[0m. Use [1m`wandb login --relogin`[0m to force relogin


Adjusting edge index and attaching edge weights for cutoff distance 6.214923452698586
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.4698, Test Acc: 0.4850, Train Loss: 0.6953, Test Loss: 0.6944
Epoch: 020, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6909, Test Loss: 0.6928
Epoch: 030, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6913, Test Loss: 0.6932
20 epochs passed without 0 test loss improvement. 
Early stopping triggered.


0,1
Test Accuracy,▂▂▂▂▂▂▂▂▂▂▂▂▆▁▅██████████████████████
Test F1,▁▁▁▁▁▁▁▁▁▁▁▁▂▇███████████████████████
Test Loss,█▇▇▇▇▆▆▆▅▄▄▃▂▂▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
Test Sensitivity,▁▁▁▁▁▁▁▁▁▁▁▁▁▇███████████████████████
Test Specificity,█████████████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▃▄▇██████████████████████
Train F1,▁▁▁▁▁▁▁▁▁▁▁▁▂████████████████████████
Train Loss,█▇▇▇▇▇▇▆▆▅▅▄▄▃▃▂▂▂▂▁▂▂▁▂▂▂▂▂▂▂▂▁▁▂▂▂▂
Train Sensitivity,▁▁▁▁▁▁▁▁▁▁▁▁▁▇███████████████████████
Train Specificity,█████████████▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Test Accuracy,0.515
Test F1,0.67987
Test Loss,0.69317
Test Sensitivity,1.0
Test Specificity,0.0
Train Accuracy,0.53017
Train F1,0.69296
Train Loss,0.69139
Train Sensitivity,1.0
Train Specificity,0.0


[34m[1mwandb[0m: Agent Starting Run: duvxit9x with config:
[34m[1mwandb[0m: 	cutoff_distance: 8.609485151933017
[34m[1mwandb[0m: 	dropout: 0.6
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 9.300780261332198e-06
[34m[1mwandb[0m: 	weight_decay: 2.0673408617606565e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 8.609485151933017
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.4698, Test Acc: 0.4850, Train Loss: 0.7801, Test Loss: 0.7675
Epoch: 020, Train Acc: 0.5366, Test Acc: 0.5400, Train Loss: 0.6937, Test Loss: 0.6928
Epoch: 030, Train Acc: 0.5280, Test Acc: 0.5050, Train Loss: 0.6889, Test Loss: 0.6912
Epoch: 040, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6890, Test Loss: 0.6924
20 epochs passed without 0 test loss improvement. 
Early stopping triggered.


0,1
Test Accuracy,▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▄▅▇█▃▁▂▃▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄
Test F1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▃▅▆▇▇██████████████████
Test Loss,█▇▆▅▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Test Sensitivity,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▄▅▇██████████████████
Test Specificity,███████████████████▇▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Accuracy,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▄▅▅█▄▂▂▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
Train F1,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▃▃▄▆▇▇██████████████████
Train Loss,█▇▆▅▄▄▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Train Sensitivity,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▃▅▇▇█████████████████
Train Specificity,███████████████████▇▄▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Test Accuracy,0.515
Test F1,0.67987
Test Loss,0.69147
Test Sensitivity,1.0
Test Specificity,0.0
Train Accuracy,0.53017
Train F1,0.69296
Train Loss,0.68814
Train Sensitivity,1.0
Train Specificity,0.0


[34m[1mwandb[0m: Agent Starting Run: n6cn0ty0 with config:
[34m[1mwandb[0m: 	cutoff_distance: 13.037331837590976
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	hidden_channels: 320
[34m[1mwandb[0m: 	learning_rate: 0.00011332735851571204
[34m[1mwandb[0m: 	weight_decay: 1.0711051373788378e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 13.037331837590976
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.5345, Test Acc: 0.5100, Train Loss: 0.7187, Test Loss: 0.7407
Epoch: 020, Train Acc: 0.5819, Test Acc: 0.5950, Train Loss: 0.6824, Test Loss: 0.6799
Epoch: 030, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.7847, Test Loss: 0.8395
Epoch: 040, Train Acc: 0.5991, Test Acc: 0.5650, Train Loss: 0.6153, Test Loss: 0.6508
Epoch: 050, Train Acc: 0.5776, Test Acc: 0.5900, Train Loss: 0.7203, Test Loss: 0.7249
Epoch: 060, Train Acc: 0.6832, Test Acc: 0.6550, Train Loss: 0.6027, Test Loss: 0.6322
Epoch: 070, Train Acc: 0.5560, Test Acc: 0.5200, Train Loss: 0.6925, Test Loss: 0.7664
Epoch: 080, Train Acc: 0.7177, Test Acc: 0.6500, Train Loss: 0.5599, Test Loss: 0.6075
Epoch: 090, Train Acc: 0.7306, Test Acc: 0.6600, Train Loss: 0.5552, Test Loss: 0.6019
Epoch: 100, Train Acc: 0.6789, Test Acc: 0.6550, Train Loss: 0.6125, Test Loss