# Run PncA WandB Sweep

In [1]:
from IPython.display import display
import os

if "SSH_CONNECTION" in os.environ:
    display("Running via SSH")
else:
    display("Running locally")
    
import sys
import os

path = os.path.join('..', '/Users/dylandissanayake/Desktop/DPhil/Comp Disc/Repositories/TB-PNCA-GNN') if "SSH_CONNECTION" not in os.environ else os.path.join('..', '/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn')
if path not in sys.path:
    sys.path.append(os.path.abspath(path))

import datetime
import random

import numpy as np
import pandas as pd
import pickle as pkl

import torch
from torch_geometric.data import Data

import wandb

import warnings
warnings.filterwarnings('ignore')

from src import run_model, protein_graph, gcn_model, evaluation

%load_ext autoreload
%autoreload 2

%aimport src

torch.cuda.is_available()

'Running via SSH'



True

In [2]:
# with open('datasets/singletons_af_graph_dict.pkl', 'rb') as f:
#     graph_dict = pkl.load(f)
with open('datasets/singletons_af_w_pza_graph_dict.pkl', 'rb') as f:
    graph_dict = pkl.load(f)
# with open('datasets/singletons_af_no_mut_feats_graph_dict.pkl', 'rb') as f:
#     graph_dict = pkl.load(f)

In [3]:
len(graph_dict['train']) + len(graph_dict['test'])

664

### Set Up Params and Sweep Config

In [4]:
seed = 42
np.random.seed(seed)
random.seed(seed)

# logging params (only used for wandb metrics)
n_samples = len(graph_dict['train']) + len(graph_dict['test'])
# cutoff_distance = 6.3  

# gcn params
num_node_features = 16
# num_node_features = 12
batch_size = 256
# hidden_channels = 64
# learning_rate = 0.001
# wd = 5e-5
epochs = 1500

wt_seq = 'MRALIIVDVQNDFCEGGSLAVTGGAALARAISDYLAEAADYHHVVATKDFHIDPGDHFSGTPDYSSSWPPHCVSGTPGADFHPSLDTSAIEAVFYKGAYTGAYSGFEGVDENGTPLLNWLRQRGVDEVDVVGIATDHCVRQTAEDAVRNGLATRVLVDLTAGVSADTTVAALEEMRTASVELVCS'

In [24]:
# First sweep:

sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'Test Accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'hidden_channels': {
        'values': [64, 128, 192, 256, 320, 384]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-2
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5, 0.6, 0.8]
        },
    'cutoff_distance': {
        'distribution': 'uniform',
        'min': 4,
        'max': 12 
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-1
      },
    }

sweep_config['parameters'] = parameters_dict

# Second sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-2 
        },
    'hidden_channels': {
        'values': [128, 192, 256, 320, 384]
        },
    })

# Third sweep:

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-5,
        'max': 1e-3 
        },
    'edge_weights': {
        'values': ["dist", "1-(dist/cutoff)", "1/dist"]
        },
    })


In [25]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Test Accuracy'},
 'parameters': {'cutoff_distance': {'distribution': 'uniform',
                                    'max': 12,
                                    'min': 4},
                'dropout': {'values': [0.2, 0.4, 0.5, 0.6, 0.8]},
                'edge_weights': {'values': ['dist',
                                            '1-(dist/cutoff)',
                                            '1/dist']},
                'hidden_channels': {'values': [128, 192, 256, 320, 384]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.001,
                                  'min': 1e-05},
                'weight_decay': {'distribution': 'log_uniform_values',
                                 'max': 0.01,
                                 'min': 1e-08}}}


### Define Training Loop

In [21]:
project = "pnca-af-singletons-sweep-w-mutation-feats"
# project = "pnca-af-singletons-sweep-NO-mutation-feats"

In [26]:
sweep_id = wandb.sweep(sweep_config, project=project)
## project 1
# sweep_id = 'x0k5kbpt'
# sweep_id = 'mnkuowkk'
# sweep_id = 'jp3xzyp5'
#! sweeps from here include proper pza distance
# sweep_id = '9b3bkt5y'
# sweep_id = 'w1bgy5pd'
# sweep_id = 'r7865la3'

# sweep_id = wandb.sweep(sweep_config, project=project)
# sweep_id = 'fypk3i0y'

Create sweep with ID: r7865la3
Sweep URL: https://wandb.ai/dylan-home/pnca-af-singletons-sweep-w-mutation-feats/sweeps/r7865la3


In [9]:
# # Run a single run

# model = run_model.pnca_GCN_vary_graph(
#             self_loops = False,
#             cutoff_distance = 4,
#             edge_weight_func = '1-(dist/cutoff)',
#             batch_size = batch_size,
#             num_node_features = num_node_features,
#             hidden_channels = 64,
#             learning_rate = 1e-5,
#             wd = 1e-5,
#             dropout = 0.5,
#             lr_scheduling=False,
#             epochs = 50,
#             graph_dict= graph_dict,
#             normalise_ews=True,
#             # wandb_params={
#             #     'use_wandb': False,
#             #     'sweep': True
#             # }
#         )

In [27]:
def sweep_run():

    with wandb.init() as run:
        config = run.config

        model = run_model.pnca_GCN_vary_graph(
            self_loops = False,
            cutoff_distance = config.cutoff_distance,
            # edge_weight_func = '1-(dist/cutoff)',
            edge_weight_func = config.edge_weights,
            batch_size = batch_size,
            num_node_features = num_node_features,
            hidden_channels = config.hidden_channels,
            learning_rate = config.learning_rate,
            wd = config.weight_decay,
            dropout = config.dropout,
            lr_scheduling=False,
            epochs = epochs,
            graph_dict= graph_dict,
            normalise_ews=True,
            wandb_params={
                'use_wandb': False,
                'sweep': True
            }
        )

        # os.makedirs(f'saved_models/carter_ds_aug/{project}/{sweep_id}', exist_ok=True)
        
        # torch.save(model, f'saved_models/carter_ds_aug/{project}/{sweep_id}/{run.name}')

In [28]:
wandb.agent(sweep_id, sweep_run, project = project, count=200)

[34m[1mwandb[0m: Agent Starting Run: 0v5q5tco with config:
[34m[1mwandb[0m: 	cutoff_distance: 6.057335565107498
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	edge_weights: 1-(dist/cutoff)
[34m[1mwandb[0m: 	hidden_channels: 320
[34m[1mwandb[0m: 	learning_rate: 7.252955287168342e-05
[34m[1mwandb[0m: 	weight_decay: 9.478203831121294e-07
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 6.057335565107498
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.5280, Test Acc: 0.5000, Train Loss: 0.6886, Test Loss: 0.6896
Epoch: 020, Train Acc: 0.5797, Test Acc: 0.5800, Train Loss: 0.6825, Test Loss: 0.6830
Epoch: 030, Train Acc: 0.5323, Test Acc: 0.5150, Train Loss: 0.6729, Test Loss: 0.6811
Epoch: 040, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6679, Test Loss: 0.6815
Epoch: 050, Train Acc: 0.5517, Test Acc: 0.5200, Train Loss: 0.6479, Test Loss: 0.6595
Epoch: 060, Train Acc: 0.5948, Test Acc: 0.5250, Train Loss: 0.6310, Test Loss: 0.6456
Epoch: 070, Train Acc: 0.7026, Test Acc: 0.6550, Train Loss: 0.6018, Test Loss: 0.6209
Epoch: 080, Train Acc: 0.6056, Test Acc: 0.5950, Train Loss: 0.6050, Test Loss: 0.6309
Epoch: 090, Train Acc: 0.7198, Test Acc: 0.6600, Train Loss: 0.5640, Test Loss: 0.5831
Epoch: 100, Train Acc: 0.7543, Test Acc: 0.7050, Train Loss: 0.5433,

0,1
Test Accuracy,▂▁▃▃▂▂▂▂▂▂▇▆▄▆▇▇▆▇▆▇▆▇▆▇▆▇█▇▇▆▇▇████▇█▇▇
Test F1,▇▁▆▆▇▇▇▇▇▇▇▇█▇██▇▇▇▇▆▇▇▇▇▇█▇▇▇▇▇████▇█▇▇
Test Loss,████▇▇▇▇▇▆▅▄▅▃▃▃▃▂▃▂▃▂▃▂▃▂▁▂▄▄▂▂▁▁▁▁▇▁▂▂
Test Sensitivity,█▁▅▄▇█████▅▅█▄▆▅▅▅▄▅▄▅▄▅▄▅▆▅▄▄▅▅▅▆▅▅▄▅▅▅
Test Specificity,▁█▅▆▂▁▁▂▁▂▆▆▂▇▆▇▇▇▇▇▇▇▇▇▇▇▆▇▇▇▇▇▇▆▇▇▇▇▇▇
Train Accuracy,▂▁▃▃▃▂▂▄▂▄▆▆▄▆▇▇▇▇▆▇▆▇▇▇▇▇█▇▇▆█▇████▆███
Train F1,▇▁▆▆▇▇▇▇▇▇▇▇▇▇██▇█▇█▇█▇█▇███▇▇██████▇███
Train Loss,████▇▇▇▇▇▆▆▅▆▅▄▄▄▄▄▃▄▃▃▃▄▃▂▂▄▄▂▂▁▁▁▁▅▁▂▁
Train Sensitivity,█▁▅▅▇█████▅▆█▅▆▆▅▆▅▆▅▆▅▆▅▆▆▆▅▅▆▅▆▆▆▆▄▆▆▆
Train Specificity,▁█▅▆▃▁▁▂▁▂▇▆▂▇▆▇▇▇▇▇▇▇▇▇█▇▇▇██▇█▇▇▇▇█▇██

0,1
Test Accuracy,0.72
Test F1,0.67442
Test Loss,0.54855
Test Sensitivity,0.56311
Test Specificity,0.8866
Train Accuracy,0.8125
Train F1,0.79433
Train Loss,0.4433
Train Sensitivity,0.68293
Train Specificity,0.95872


[34m[1mwandb[0m: Agent Starting Run: yky9d62w with config:
[34m[1mwandb[0m: 	cutoff_distance: 4.418753950502125
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	edge_weights: dist
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 7.161810216811372e-05
[34m[1mwandb[0m: 	weight_decay: 5.466810046631845e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 4.418753950502125
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.4720, Test Acc: 0.4850, Train Loss: 0.6935, Test Loss: 0.6931
Epoch: 020, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6902, Test Loss: 0.6910
Epoch: 030, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6880, Test Loss: 0.6902
Epoch: 040, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6858, Test Loss: 0.6891
Epoch: 050, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6851, Test Loss: 0.6879
Epoch: 060, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6830, Test Loss: 0.6867
Epoch: 070, Train Acc: 0.5302, Test Acc: 0.5150, Train Loss: 0.6797, Test Loss: 0.6843
Epoch: 080, Train Acc: 0.5345, Test Acc: 0.5200, Train Loss: 0.6759, Test Loss: 0.6814
Epoch: 090, Train Acc: 0.5927, Test Acc: 0.5150, Train Loss: 0.6724, Test Loss: 0.6778
Epoch: 100, Train Acc: 0.6207, Test Acc: 0.5450, Train Loss: 0.6664,

0,1
Test Accuracy,▂▁▁▁▁▁▁▁▁▅▅▃▃▅▄▃▄▄▆▆▆▆▆▆▆▇▆▆▆▇▇▇▇███▇███
Test F1,▅▆▆▆▆▆▆▆▆▆▆▁▁▄▃▁▃▃▅▅▆▅▅▅▅▆▄▄▅▆▆▆▆█▇█▇▇▇█
Test Loss,██████▇▇▇▇▆▆▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▂▂▁▂▂▂▁▂▂▁▁▁
Test Sensitivity,▆██████▇█▅▅▁▁▃▂▁▂▂▃▄▄▄▃▃▃▃▂▂▃▃▄▃▃▅▅▆▅▄▅▅
Test Specificity,▃▁▁▁▁▁▁▂▁▅▆▇▇▆▇▇▇▇▇▇▆▇▇▇█▇████▇██▇▇▆▆█▇▆
Train Accuracy,▁▁▁▁▁▁▁▂▂▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇██▇█▇██
Train F1,▁▃▃▃▃▃▃▄▃▃▄▁▂▃▃▂▃▃▄▅▅▅▅▅▅▅▅▅▅▆▆▆▆█▇▇█▇██
Train Loss,███████▇▇▇▇▇▆▆▆▅▅▅▅▄▄▄▄▄▄▃▄▃▃▃▂▂▂▂▂▂▁▂▁▁
Train Sensitivity,▅████████▅▄▁▂▃▂▂▂▂▃▄▄▄▃▄▃▃▃▃▃▃▄▄▄▅▅▆▆▄▅▆
Train Specificity,▃▁▁▁▁▁▁▂▁▅▆█▇▆▇█▇▇▇▇▇▇▇▇█████████▇█▆▇██▇

0,1
Test Accuracy,0.72
Test F1,0.70526
Test Loss,0.55004
Test Sensitivity,0.65049
Test Specificity,0.79381
Train Accuracy,0.82759
Train F1,0.82063
Train Loss,0.40315
Train Sensitivity,0.7439
Train Specificity,0.92202


[34m[1mwandb[0m: Agent Starting Run: wonzuu5w with config:
[34m[1mwandb[0m: 	cutoff_distance: 7.484564821190283
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	edge_weights: dist
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 1.5576715769846176e-05
[34m[1mwandb[0m: 	weight_decay: 4.112169248817613e-06
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 7.484564821190283
