# Run PncA WandB Sweep

In [1]:
from IPython.display import display
import os

if "SSH_CONNECTION" in os.environ:
    display("Running via SSH")
else:
    display("Running locally")
    
import sys
import os

path = os.path.join('..', '/Users/dylandissanayake/Desktop/DPhil/Comp Disc/Repositories/TB-PNCA-GNN') if "SSH_CONNECTION" not in os.environ else os.path.join('..', '/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn')
if path not in sys.path:
    sys.path.append(os.path.abspath(path))

import datetime
import random

import numpy as np
import pandas as pd

import torch
from torch_geometric.data import Data

import wandb

import warnings
warnings.filterwarnings('ignore')

from src import run_model, protein_graph, gcn_model, evaluation

%load_ext autoreload
%autoreload 2

%aimport src

torch.cuda.is_available()

'Running via SSH'



True

In [2]:
full_dataset = torch.load('datasets/full_dataset_v2.pth')

### Set Up Params and Sweep Config

In [3]:
seed = 42
np.random.seed(seed)
random.seed(seed)

# logging params (only used for wandb metrics)
n_samples = 10000
# cutoff_distance = 6.3  

# gcn params
num_node_features = 12
batch_size = 256
# hidden_channels = 64
# learning_rate = 0.001
# wd = 5e-5
epochs = 1500

wt_seq = 'MRALIIVDVQNDFCEGGSLAVTGGAALARAISDYLAEAADYHHVVATKDFHIDPGDHFSGTPDYSSSWPPHCVSGTPGADFHPSLDTSAIEAVFYKGAYTGAYSGFEGVDENGTPLLNWLRQRGVDEVDVVGIATDHCVRQTAEDAVRNGLATRVLVDLTAGVSADTTVAALEEMRTASVELVCS'

In [4]:
# First sweep:

sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'Test Accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'hidden_channels': {
        'values': [32, 64, 128, 256]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-2
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5, 0.6, 0.8]
        },
    'cutoff_distance': {
        'distribution': 'uniform',
        'min': 3.5,
        'max': 10 
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-1
      },
    }

sweep_config['parameters'] = parameters_dict

# Second / third sweep:

parameters_dict.update({
    'hidden_channels': {
        'values': [64, 128, 192, 256, 320, 384]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 5e-7, 
        'max': 1e-2
        },
    'dropout': {
          'values': [0.4, 0.5, 0.6, 0.8]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-2 
        },
    })

# Fourth / Fifth sweep:

parameters_dict.update({
    'hidden_channels': {
        'values': [128, 192, 256, 320]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3
        },
    'dropout': {
          'values': [0.4, 0.5, 0.6, 0.8]
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-5,
        'max': 5e-2
        },
    })

# Sixth sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-6,
        'max': 5e-3 
        }
    })

# Seventh sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-6,
        'max': 5e-4 
        }
    })

# Eight sweep w cutoff varying:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 5e-6,
        'max': 1e-3 
        },
        'cutoff_distance': {
        'distribution': 'uniform',
        'min': 5.5,
        'max': 14
        },
    })



In [5]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Test Accuracy'},
 'parameters': {'cutoff_distance': {'distribution': 'uniform',
                                    'max': 14,
                                    'min': 5.5},
                'dropout': {'values': [0.4, 0.5, 0.6, 0.8]},
                'hidden_channels': {'values': [128, 192, 256, 320]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.001,
                                  'min': 5e-06},
                'weight_decay': {'distribution': 'log_uniform_values',
                                 'max': 0.001,
                                 'min': 1e-05}}}


### Define Training Loop

In [6]:
project = "pnca-sweep-2"

In [8]:
# sweep_id = wandb.sweep(sweep_config, project=project)

# project = pnca-sweep-1
# sweep_id = '18ili8gf'
# sweep_id = 'z95xpbwd'
# sweep_id = 'tr8mwg42'
# sweep_id = '2hsfpqs6'
# sweep_id = 'gt2h3xwl'
# sweep_id = 'y0a18l3k'
# sweep_id = 'rl3tm5la'
# sweep_id = 'f2b4l8u4'

# ---
# project = pnca-sweep-2
sweep_id = 'u3q627wd'


In [9]:
def sweep_run():

    with wandb.init() as run:
        config = run.config

        model = run_model.pnca_simpleGCN(
            # sequences=sequences_dict,
            sequences=None,
            self_loops = False,
            cutoff_distance = config.cutoff_distance,
            edge_weight_func = '1-(dist/cutoff)',
            # edge_weight_func = 'none',
            batch_size = batch_size,
            num_node_features = num_node_features,
            hidden_channels = config.hidden_channels,
            learning_rate = config.learning_rate,
            wd = config.weight_decay,
            dropout = config.dropout,
            lr_scheduling=False,
            epochs = epochs,
            dataset = full_dataset,
            normalise_ews=True,
            wandb_params={
                'use_wandb': False,
                'sweep': True
            }
        )

        os.makedirs(f'saved_models/carter_ds_aug/{project}/{sweep_id}', exist_ok=True)
        
        torch.save(model, f'saved_models/carter_ds_aug/{project}/{sweep_id}/{run.name}')

In [10]:
wandb.agent(sweep_id, sweep_run, project = project, count=10)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Agent Starting Run: 4hviwzah with config:
[34m[1mwandb[0m: 	cutoff_distance: 10.9601678879428
[34m[1mwandb[0m: 	dropout: 0.6
[34m[1mwandb[0m: 	hidden_channels: 320
[34m[1mwandb[0m: 	learning_rate: 0.000248185169779706
[34m[1mwandb[0m: 	weight_decay: 0.00016897574433757828
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdylan-home[0m. Use [1m`wandb login --relogin`[0m to force relogin


Adjusting edge index and attaching edge weights for cutoff distance 10.9601678879428
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.4950, Test Acc: 0.5553, Train Loss: 1.7187, Test Loss: 1.5172
Epoch: 020, Train Acc: 0.7511, Test Acc: 0.6977, Train Loss: 0.4961, Test Loss: 0.5937
Epoch: 030, Train Acc: 0.5346, Test Acc: 0.5130, Train Loss: 0.8438, Test Loss: 1.0467
Epoch: 040, Train Acc: 0.8273, Test Acc: 0.7437, Train Loss: 0.3828, Test Loss: 0.5536
20 epochs passed without 0 test loss improvement. 
Early stopping triggered.


0,1
Test Accuracy,▃▁▅▆▅▇▆▇▁▂▄▁▁▁▆▇▁▄▇▆█▇▁▇▁▁▇▇█▁▁▆▂▁▁▁▇▇█▁
Test F1,▃▇▅▇▅▇▆█▇▁▄▇▇▇▆▇▇▄▇██▇▇█▇▇███▇▇▆▇▇▇▇█▆█▇
Test Loss,▁▂▁▁▁▁▁▁▂▃▂▄▄▂▁▁▆▂▁▁▁▁▃▁▃▄▁▁▁▂▂▂▂█▇▆▁▁▁▃
Test Sensitivity,▂█▄▅▃▅▄▆█▁▂███▄▅█▂▄▇▆▅█▇██▇▇▇██▃████▇▄▆█
Test Specificity,█▁▇▆█▇▇▆▁██▁▁▁█▇▁██▄▇▇▁▅▁▁▅▅▆▁▁█▁▁▁▁▆█▇▁
Train Accuracy,▂▂▄▆▄▆▅▆▂▁▃▂▂▂▅▇▂▃▆▆█▇▂▇▂▂▇▇█▂▂▅▂▂▂▂█▆█▂
Train F1,▂▇▅▇▅▇▆▇▇▁▃▇▇▇▅▇▇▃▆██▇▇█▇▇███▇▇▆▇▇▇▇█▆█▇
Train Loss,▂▂▂▁▂▁▁▁▂▄▂▄▄▂▂▁▆▃▁▁▁▁▃▁▃▄▁▁▁▂▂▂▂█▇▆▁▁▁▃
Train Sensitivity,▁█▃▅▃▅▄▆█▁▂███▄▅█▂▄█▆▅█▇██▇▇▇██▄████▇▄▆█
Train Specificity,█▁█▇█▇█▆▁██▁▁▁██▁██▅▇█▁▆▁▁▆▆▇▁▁█▁▁▁▁▇██▁

0,1
Test Accuracy,0.51267
Test F1,0.67783
Test Loss,1.55351
Test Sensitivity,1.0
Test Specificity,0.0
Train Accuracy,0.53457
Train F1,0.6967
Train Loss,1.26823
Train Sensitivity,1.0
Train Specificity,0.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: m5jqbzjr with config:
[34m[1mwandb[0m: 	cutoff_distance: 10.558035357826068
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	hidden_channels: 192
[34m[1mwandb[0m: 	learning_rate: 0.0007457918765369257
[34m[1mwandb[0m: 	weight_decay: 1.4151947202350656e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 10.558035357826068
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.4699, Test Acc: 0.4973, Train Loss: 3.2531, Test Loss: 2.9842
Epoch: 020, Train Acc: 0.5346, Test Acc: 0.5127, Train Loss: 6.8117, Test Loss: 7.2829
Epoch: 030, Train Acc: 0.7760, Test Acc: 0.7600, Train Loss: 0.4939, Test Loss: 0.5459
20 epochs passed without 0 test loss improvement. 
Early stopping triggered.


0,1
Test Accuracy,▄▃▁▆▁▃▃▂▂▁▅▇█▂██▂▂▇▂▆▂▁▃▆▂▇▇▂▇▂▂▂▄▂
Test F1,▅▃▁▆▇▄▃▇▇▁▅██▇██▇▇▇▇▆▇▁▇▆▃██▇█▇▇▇▅▇
Test Loss,▁▁▁▁▁▁▁▁▃▂▁▁▁▂▁▁▃▃▁▄▁▂█▁▁▂▁▁▃▁▁▂▂▂▁
Test Sensitivity,▃▂▁▄█▂▂██▁▃▆▆█▆▅██▅█▄█▁█▄▂▇▅█▅███▃█
Test Specificity,███▇▁██▁▁██▆▆▁▇▇▁▁█▁█▁█▂██▅▇▁▇▁▁▁█▁
Train Accuracy,▃▂▁▅▂▂▂▂▂▁▄██▂█▇▂▂▇▂▅▂▁▃▅▂█▇▂▇▂▂▂▃▂
Train F1,▃▂▁▆▇▃▂▇▇▁▄██▇█▇▇▇▇▇▆▇▁▇▅▂█▇▇▇▇▇▇▄▇
Train Loss,▁▁▁▁▁▁▁▁▃▂▁▁▁▂▁▁▃▃▁▄▁▂█▁▁▂▁▁▃▁▁▂▂▂▁
Train Sensitivity,▂▁▁▄█▂▂██▁▃▆▆█▆▅██▅█▄█▁█▄▁▇▅█▅███▃█
Train Specificity,████▁██▁▁██▇▇▁██▁▁█▁█▁█▂██▆█▁█▁▁▁█▁

0,1
Test Accuracy,0.51267
Test F1,0.67783
Test Loss,1.59111
Test Sensitivity,1.0
Test Specificity,0.0
Train Accuracy,0.53471
Train F1,0.69677
Train Loss,1.23435
Train Sensitivity,1.0
Train Specificity,0.00031


[34m[1mwandb[0m: Agent Starting Run: lzk44r8n with config:
[34m[1mwandb[0m: 	cutoff_distance: 8.243325306461895
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	hidden_channels: 192
[34m[1mwandb[0m: 	learning_rate: 1.182462073730952e-05
[34m[1mwandb[0m: 	weight_decay: 1.508436395358384e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 8.243325306461895
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.5306, Test Acc: 0.5100, Train Loss: 0.6820, Test Loss: 0.6836
Epoch: 020, Train Acc: 0.6401, Test Acc: 0.5997, Train Loss: 0.6740, Test Loss: 0.6712
Epoch: 030, Train Acc: 0.5724, Test Acc: 0.5497, Train Loss: 0.6636, Test Loss: 0.6611
Epoch: 040, Train Acc: 0.5331, Test Acc: 0.4970, Train Loss: 0.6579, Test Loss: 0.6614
Epoch: 050, Train Acc: 0.6790, Test Acc: 0.6423, Train Loss: 0.6417, Test Loss: 0.6383
Epoch: 060, Train Acc: 0.6963, Test Acc: 0.6537, Train Loss: 0.6296, Test Loss: 0.6268
Epoch: 070, Train Acc: 0.6837, Test Acc: 0.6490, Train Loss: 0.6160, Test Loss: 0.6170
Epoch: 080, Train Acc: 0.5751, Test Acc: 0.5497, Train Loss: 0.6137, Test Loss: 0.6272
Epoch: 090, Train Acc: 0.7154, Test Acc: 0.6967, Train Loss: 0.5912, Test Loss: 0.5895
Epoch: 100, Train Acc: 0.7293, Test Acc: 0.6977, Train Loss: 0.5737,

0,1
Test Accuracy,▁▁▄▁▁▅▄▅▆▆▄▆▆▆▄▆▇▇▇▇▇▇▇▇█████▇█▇█████▇▇█
Test F1,▂▃▁▁▂▂▂▃▃▂▃▄▄▄▄▅▅▂▄▅▆▄▄▇▆▇▇▇▆▇▆█▇███████
Test Loss,██▇▇▇▆▆▆▅▅▅▅▄▄▅▄▃▄▃▂▂▃▂▃▂▂▁▁▁▂▁▂▁▁▁▁▁▃▃▁
Test Sensitivity,██▄▇▇▃▄▄▃▃▅▄▃▃▆▅▄▁▂▃▃▂▂▆▃▅▄▄▃▆▃▅▄▄▄▄▅▆▆▅
Test Specificity,▁▁▅▁▁▆▅▅▆▇▄▆▆▇▄▅▇██▇▇██▅▇▇▇▇█▆█▆▇▇▇▇▇▆▆▆
Train Accuracy,▁▁▃▁▁▅▄▅▅▅▄▅▅▆▄▅▆▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████▇██
Train F1,▃▃▂▂▃▂▃▃▃▃▄▄▄▄▅▅▅▁▄▅▅▃▄▆▅▆▆▆▅▇▅▇▆▇▇▇████
Train Loss,████▇▇▇▇▆▆▆▅▅▅▅▅▄▅▄▄▃▄▄▃▃▃▂▂▃▂▃▂▂▂▁▁▁▂▂▁
Train Sensitivity,██▄▇█▃▄▄▃▃▅▄▄▃▆▆▄▁▃▃▄▂▂▆▃▅▄▄▃▆▃▆▄▅▅▅▅▆▆▅
Train Specificity,▁▁▅▁▁▇▅▆▇▇▅▇▇▇▄▆▇███▇██▆█▇▇██▆█▇████▇▆▇▇

0,1
Test Accuracy,0.76433
Test F1,0.77448
Test Loss,0.47392
Test Sensitivity,0.78934
Test Specificity,0.73803
Train Accuracy,0.84943
Train F1,0.85088
Train Loss,0.36056
Train Sensitivity,0.80358
Train Specificity,0.90209


[34m[1mwandb[0m: Agent Starting Run: 4o2scfi4 with config:
[34m[1mwandb[0m: 	cutoff_distance: 13.83453154962563
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 9.512638436644785e-06
[34m[1mwandb[0m: 	weight_decay: 2.9916536478952607e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 13.83453154962563
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.5406, Test Acc: 0.5410, Train Loss: 0.6423, Test Loss: 0.6397
Epoch: 020, Train Acc: 0.7020, Test Acc: 0.7080, Train Loss: 0.6025, Test Loss: 0.5762
Epoch: 030, Train Acc: 0.7239, Test Acc: 0.6790, Train Loss: 0.5497, Test Loss: 0.5670
Epoch: 040, Train Acc: 0.7507, Test Acc: 0.7140, Train Loss: 0.5160, Test Loss: 0.5418
Epoch: 050, Train Acc: 0.7653, Test Acc: 0.7633, Train Loss: 0.4861, Test Loss: 0.4844
