# Run PncA WandB Sweep

In [1]:
from IPython.display import display
import os

if "SSH_CONNECTION" in os.environ:
    display("Running via SSH")
else:
    display("Running locally")
    
import sys
import os

path = os.path.join('..', '/Users/dylandissanayake/Desktop/DPhil/Comp Disc/Repositories/TB-PNCA-GNN') if "SSH_CONNECTION" not in os.environ else os.path.join('..', '/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn')
if path not in sys.path:
    sys.path.append(os.path.abspath(path))

import datetime
import random

import numpy as np
import pandas as pd
import copy
import pickle

import torch
from torch_geometric.data import Data

import wandb

import sbmlcore
import sbmlsim
import gumpy

import warnings
warnings.filterwarnings('ignore')

from src import run_model, protein_graph, gcn_model, evaluation

%load_ext autoreload
%autoreload 2

%aimport src

torch.cuda.is_available()

'Running via SSH'

  from .autonotebook import tqdm as notebook_tqdm


True

In [2]:
full_dataset = torch.load('full_orig_dataset.pth')

### Set Up Params and Sweep Config

In [3]:
seed = 42
np.random.seed(seed)
random.seed(seed)

# logging params (only used for wandb metrics)
n_samples = 10000
cutoff_distance = 6.3  

# gcn params
ews = 'none'
num_node_features = 12
batch_size = 256
hidden_channels = 64
learning_rate = 0.001
wd = 5e-5
epochs = 500

wt_seq = 'MRALIIVDVQNDFCEGGSLAVTGGAALARAISDYLAEAADYHHVVATKDFHIDPGDHFSGTPDYSSSWPPHCVSGTPGADFHPSLDTSAIEAVFYKGAYTGAYSGFEGVDENGTPLLNWLRQRGVDEVDVVGIATDHCVRQTAEDAVRNGLATRVLVDLTAGVSADTTVAALEEMRTASVELVCS'

In [4]:
sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'Test Accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    # 'optimizer': {
    #     'values': ['adam', 'sgd']
    #     },
    'hidden_channels': {
        'values': [32, 64, 128, 256]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-2
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5, 0.6, 0.8]
        },
    'cutoff_distance': {
        'distribution': 'uniform',
        'min': 3,
        'max': 10
        },
    }

sweep_config['parameters'] = parameters_dict

# parameters_dict.update({
#     'epochs': {
#         'value': 400}
#     })

parameters_dict.update({
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-1
      },
    # 'batch_size': {
    #     # integers between 32 and 256
    #     # with evenly-distributed logarithms 
    #     'distribution': 'q_log_uniform_values',
    #     'q': 8,
    #     'min': 32,
    #     'max': 256,
    #   }
    })



In [5]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Test Accuracy'},
 'parameters': {'cutoff_distance': {'distribution': 'uniform',
                                    'max': 10,
                                    'min': 3},
                'dropout': {'values': [0.2, 0.4, 0.5, 0.6, 0.8]},
                'hidden_channels': {'values': [32, 64, 128, 256]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.1,
                                  'min': 1e-08},
                'weight_decay': {'distribution': 'log_uniform_values',
                                 'max': 0.01,
                                 'min': 1e-08}}}


### Define Training Loop

In [6]:
project = "pnca-sweep-1"

In [7]:
# sweep_id = wandb.sweep(sweep_config, project=project)
sweep_id = '18ili8gf'

In [8]:
def sweep_run():

    with wandb.init() as run:
        config = run.config
        
        model = run_model.pnca_simpleGCN(
            # sequences=sequences_dict,
            sequences=None,
            self_loops = False,
            cutoff_distance = config.cutoff_distance,
            edge_weight_func = '1-(dist/cutoff)',
            # edge_weight_func = 'none',
            batch_size = batch_size,
            num_node_features = num_node_features,
            hidden_channels = config.hidden_channels,
            learning_rate = config.learning_rate,
            wd = config.weight_decay,
            dropout = config.dropout,
            epochs = epochs,
            dataset = full_dataset,
            normalise_ews=True,
            wandb_params={
                'use_wandb': False,
                'sweep': True
            }
        )

        torch.save(model, f'saved_models/carter_ds_aug/{project}/{datetime.datetime.now().strftime("%Y_%m_%d_%H%M%S")}')

In [10]:
wandb.agent(sweep_id, sweep_run, project = project, count=200)

[34m[1mwandb[0m: Agent Starting Run: 8adjzh0b with config:
[34m[1mwandb[0m: 	cutoff_distance: 6.763066638346833
[34m[1mwandb[0m: 	dropout: 0.5
[34m[1mwandb[0m: 	hidden_channels: 128
[34m[1mwandb[0m: 	learning_rate: 0.07635180251825581
[34m[1mwandb[0m: 	weight_decay: 3.18605477242148e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mdylan-home[0m. Use [1m`wandb login --relogin`[0m to force relogin


Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.
Epoch: 010, Train Acc: 0.5264, Test Acc: 0.5110, Train Loss: 0.6973, Test Loss: 0.7018
Epoch: 020, Train Acc: 0.4736, Test Acc: 0.4890, Train Loss: 0.7405, Test Loss: 0.7330
20 epochs passed without 0 test loss improvement. 
Early stopping triggered.


0,1
Test Accuracy,█▁█▁████▁█▁█▁█████▁▁█
Test F1,█▁█▁████▁█▁█▁█████▁▁█
Test Loss,▁▁█▃▁▁▂▄█▂▂▁▃▃▂▁▂▁▁▃▁
Test Sensitivity,█▁█▁████▁█▁█▁█████▁▁█
Test Specificity,▁█▁█▁▁▁▁█▁█▁█▁▁▁▁▁██▁
Train Accuracy,█▁█▁████▁█▁█▁█████▁▁█
Train F1,█▁█▁████▁█▁█▁█████▁▁█
Train Loss,▁▁▆▃▁▁▂▃█▁▂▁▃▂▂▁▂▁▁▄▁
Train Sensitivity,█▁█▁████▁█▁█▁█████▁▁█
Train Specificity,▁█▁█▁▁▁▁█▁█▁█▁▁▁▁▁██▁

0,1
Test Accuracy,0.511
Test F1,0.67637
Test Loss,0.69304
Test Sensitivity,1.0
Test Specificity,0.0
Train Accuracy,0.52643
Train F1,0.68975
Train Loss,0.69178
Train Sensitivity,1.0
Train Specificity,0.0
