# Run PncA WandB Sweep

In [1]:
from IPython.display import display
import os

if "SSH_CONNECTION" in os.environ:
    display("Running via SSH")
else:
    display("Running locally")
    
import sys
import os

path = os.path.join('..', '/Users/dylandissanayake/Desktop/DPhil/Comp Disc/Repositories/TB-PNCA-GNN') if "SSH_CONNECTION" not in os.environ else os.path.join('..', '/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn')
if path not in sys.path:
    sys.path.append(os.path.abspath(path))

import datetime
import random

import numpy as np
import pandas as pd
import pickle as pkl

import torch
from torch_geometric.data import Data

import wandb

import warnings
warnings.filterwarnings('ignore')

from src import run_model, protein_graph, gcn_model, evaluation

%load_ext autoreload
%autoreload 2

%aimport src

torch.cuda.is_available()

'Running via SSH'



True

In [2]:
with open('datasets/singletons_af_graph_dict.pkl', 'rb') as f:
    graph_dict = pkl.load(f)

In [3]:
len(graph_dict['train']) + len(graph_dict['test'])

664

### Set Up Params and Sweep Config

In [17]:
seed = 42
np.random.seed(seed)
random.seed(seed)

# logging params (only used for wandb metrics)
n_samples = len(graph_dict['train']) + len(graph_dict['test'])
# cutoff_distance = 6.3  

# gcn params
# num_node_features = 16
num_node_features = 12
batch_size = 256
# hidden_channels = 64
# learning_rate = 0.001
# wd = 5e-5
epochs = 1500

wt_seq = 'MRALIIVDVQNDFCEGGSLAVTGGAALARAISDYLAEAADYHHVVATKDFHIDPGDHFSGTPDYSSSWPPHCVSGTPGADFHPSLDTSAIEAVFYKGAYTGAYSGFEGVDENGTPLLNWLRQRGVDEVDVVGIATDHCVRQTAEDAVRNGLATRVLVDLTAGVSADTTVAALEEMRTASVELVCS'

In [14]:
# First sweep:

sweep_config = {
    'method': 'random'
    }

metric = {
    'name': 'Test Accuracy',
    'goal': 'maximize'   
    }

sweep_config['metric'] = metric

parameters_dict = {
    'hidden_channels': {
        'values': [64, 128, 192, 256, 320, 384]
        },
    'weight_decay': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-2
        },
    'dropout': {
          'values': [0.2, 0.4, 0.5, 0.6, 0.8]
        },
    'cutoff_distance': {
        'distribution': 'uniform',
        'min': 4,
        'max': 12 
        },
    'learning_rate': {
        'distribution': 'log_uniform_values',
        'min': 1e-8,
        'max': 1e-1
      },
    }

sweep_config['parameters'] = parameters_dict

# # Second / third sweep:

# parameters_dict.update({
#     'hidden_channels': {
#         'values': [64, 128, 192, 256, 320, 384]
#         },
#     'learning_rate': {
#         'distribution': 'log_uniform_values',
#         'min': 1e-5,
#         'max': 1e-2 
#         },
#     })


In [15]:
import pprint
pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'maximize', 'name': 'Test Accuracy'},
 'parameters': {'cutoff_distance': {'distribution': 'uniform',
                                    'max': 12,
                                    'min': 4},
                'dropout': {'values': [0.2, 0.4, 0.5, 0.6, 0.8]},
                'hidden_channels': {'values': [64, 128, 192, 256, 320, 384]},
                'learning_rate': {'distribution': 'log_uniform_values',
                                  'max': 0.1,
                                  'min': 1e-08},
                'weight_decay': {'distribution': 'log_uniform_values',
                                 'max': 0.01,
                                 'min': 1e-08}}}


### Define Training Loop

In [12]:
# project = "pnca-af-singletons-sweep-w-mutation-feats"
project = "pnca-af-singletons-sweep-NO-mutation-feats"

In [16]:
# sweep_id = wandb.sweep(sweep_config, project=project)
## project 1
# sweep_id = 'x0k5kbpt'
# sweep_id = 'mnkuowkk'
# sweep_id = 'jp3xzyp5'

sweep_id = wandb.sweep(sweep_config, project=project)
# sweep_id = 'fypk3i0y'

Create sweep with ID: fypk3i0y
Sweep URL: https://wandb.ai/dylan-home/pnca-af-singletons-sweep-NO-mutation-feats/sweeps/fypk3i0y


In [9]:
# # Run a single run

# model = run_model.pnca_GCN_vary_graph(
#             self_loops = False,
#             cutoff_distance = 4,
#             edge_weight_func = '1-(dist/cutoff)',
#             batch_size = batch_size,
#             num_node_features = num_node_features,
#             hidden_channels = 64,
#             learning_rate = 1e-5,
#             wd = 1e-5,
#             dropout = 0.5,
#             lr_scheduling=False,
#             epochs = 50,
#             graph_dict= graph_dict,
#             normalise_ews=True,
#             # wandb_params={
#             #     'use_wandb': False,
#             #     'sweep': True
#             # }
#         )

In [18]:
def sweep_run():

    with wandb.init() as run:
        config = run.config

        model = run_model.pnca_GCN_vary_graph(
            self_loops = False,
            cutoff_distance = config.cutoff_distance,
            edge_weight_func = '1-(dist/cutoff)',
            batch_size = batch_size,
            num_node_features = num_node_features,
            hidden_channels = config.hidden_channels,
            learning_rate = config.learning_rate,
            wd = config.weight_decay,
            dropout = config.dropout,
            lr_scheduling=False,
            epochs = epochs,
            graph_dict= graph_dict,
            normalise_ews=True,
            wandb_params={
                'use_wandb': False,
                'sweep': True
            }
        )

        # os.makedirs(f'saved_models/carter_ds_aug/{project}/{sweep_id}', exist_ok=True)
        
        # torch.save(model, f'saved_models/carter_ds_aug/{project}/{sweep_id}/{run.name}')

In [19]:
wandb.agent(sweep_id, sweep_run, project = project, count=100)

[34m[1mwandb[0m: Agent Starting Run: tyo9evb0 with config:
[34m[1mwandb[0m: 	cutoff_distance: 7.593719240317108
[34m[1mwandb[0m: 	dropout: 0.8
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 1.5501650541478128e-05
[34m[1mwandb[0m: 	weight_decay: 1.939450838252101e-05
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 7.593719240317108
Using CUDA
Early stopping enabled. Patience: 20. Min Delta: 0.


Traceback (most recent call last):
  File "/tmp/ipykernel_1249694/1967315548.py", line 6, in sweep_run
    model = run_model.pnca_GCN_vary_graph(
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/run_model.py", line 390, in pnca_GCN_vary_graph
    train_acc, test_acc, train_loss, test_loss = gcntrainer.run(epochs=epochs,
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/gcn_model.py", line 151, in run
    self.train()
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/gcn_model.py", line 67, in train
    out = self.model(data.x, data.edge_index, data.edge_attr, data.batch)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/gcn_model.py

Run tyo9evb0 errored:
Traceback (most recent call last):
  File "/home/ubuntu/.local/lib/python3.10/site-packages/wandb/agents/pyagent.py", line 308, in _run_job
    self._function()
  File "/tmp/ipykernel_1249694/1967315548.py", line 6, in sweep_run
    model = run_model.pnca_GCN_vary_graph(
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/run_model.py", line 390, in pnca_GCN_vary_graph
    train_acc, test_acc, train_loss, test_loss = gcntrainer.run(epochs=epochs,
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/gcn_model.py", line 151, in run
    self.train()
  File "/mnt/alphafold-volume-1/dylan2/repos/tb-pnca-gnn/src/gcn_model.py", line 67, in train
    out = self.model(data.x, data.edge_index, data.edge_attr, data.batch)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ubuntu/.local/lib/python3.10/site-packages/torch/nn/modules/mod

[34m[1mwandb[0m: Agent Starting Run: s95q7wc1 with config:
[34m[1mwandb[0m: 	cutoff_distance: 6.416524195341323
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	hidden_channels: 320
[34m[1mwandb[0m: 	learning_rate: 0.0027578990112550146
[34m[1mwandb[0m: 	weight_decay: 3.147785384162237e-06
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Adjusting edge index and attaching edge weights for cutoff distance 6.416524195341323
