In [1]:
from argparse import ArgumentParser
from model.origgnn import MolecularGNN
from utils.package import plot_fit_confidence_bond
from pytorch_lightning import Trainer
import pytorch_lightning as pl
from collections import defaultdict
from torch.utils.data import DataLoader
from torch.utils.data import random_split
import torch
import numpy as np
import wandb
import time
from sklearn.metrics import r2_score
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

In [2]:

wandb.login(key='local-8fe6e6b5840c4c05aaaf6aac5ca8c1fb58abbd1f', host='http://localhost:8080')


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhuabei[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for localhost to your netrc file: /home/huabei/.netrc


True

In [3]:
# %%wandb
def main(hparams):
    
    model_name = f'3dgnn-dim-{hparams.dim}-hlayer-{hparams.layer_hidden}-olayer-{hparams.layer_output}-' + time.strftime("%Y%m%d_%H%M%S", time.localtime())
    dict_args = vars(hparams)
    model = MolecularGNN(**dict_args)
    # logger
    wandb_logger = pl.loggers.WandbLogger(save_dir='log/origgnn')
    # callbacks
    # early stopping
    early_stopping = pl.callbacks.early_stopping.EarlyStopping(monitor='val_loss', patience=20, mode='min')
    # checkpoint
    checkpoint_callback = pl.callbacks.ModelCheckpoint(monitor='val_loss', mode='min', save_last=True,
                                                         dirpath='checkpoints', filename=model_name)
    if hparams.checkpoint == None:
        trainer = Trainer.from_argparse_args(hparams, logger=wandb_logger, auto_lr_find=True, callbacks=[early_stopping, checkpoint_callback])
    else:
        trainer = Trainer(resume_from_checkpoint=hparams.checkpoint, callbacks=[early_stopping])
    # trainer.tune(model)
 
    # Train
    trainer.fit(model)
    # trainer.save_checkpoint(time.strftime("%Y%m%d_%H%M%S", time.localtime()) + ".ckpt")
    trainer.test(model, dataloaders=model.val_dataloader(), verbose=False)
    x = np.array(model.predictions['true'])
    y = np.array(model.predictions['pred'])
    val_r2 = r2_score(x, y)
    val_fig = plot_fit_confidence_bond(x, y, val_r2, annot=False)
    
    model.predictions = defaultdict(list)
    trainer.test(model, dataloaders=model.train_dataloader(), verbose=False)
    x = np.array(model.predictions['true'])
    y = np.array(model.predictions['pred'])
    train_r2 = r2_score(x, y)
    train_fig = plot_fit_confidence_bond(x, y, train_r2, annot=False)
    if True:
        wandb.log({'train_res': train_fig, 'val_res': val_fig})
        wandb.log({'val_r2': val_r2, 'train_r2':train_r2})
        wandb.finish()

def prepare_arg():
    parser = ArgumentParser()
    parser.add_argument("--dim", type=int, default=512)
    parser.add_argument("--layer_hidden",type=int, default=24)
    parser.add_argument("--layer_output",type=int, default=8)
    parser.add_argument("--batch_size",type=int, default=128)
    parser.add_argument("--data_path",type=str, default=None)
    parser.add_argument("--checkpoint",type=str, default=None)
    return parser

  rank_zero_warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name       | Type       | Params
------------------------------------------
0 | embed_atom | Embedding  | 4.1 K 
1 | gamma      | ModuleList | 192   
2 | W_atom     | ModuleList | 6.3 M 
3 | W_output   | ModuleList | 2.1 M 
4 | W_property | Linear     | 513   
------------------------------------------
8.4 M     Trainable params
0         Non-trainable params
8.4 M     Total params
33.639    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

In [None]:

if __name__ == "__main__":
    project = '3dgnn'
    wandb.init(project=project, save_code=True, dir='log/origgnn')
    # prepare args
    parser = prepare_arg()
    dataset_path = '/home/huabei/Projects/SMTarRNA/project/data/in_man_exhaustiveness_96_orig_conformation.txt'
    checkpoint = '20220618_211314.ckpt'
    # add model args
    parser = MolecularGNN.add_model_specific_args(parent_parser=parser)
    # add Trainer args
    parser = Trainer.add_argparse_args(parser)
    args = parser.parse_args(['--data_path', dataset_path, '--learning_rate', '0.0001', '--gpus=1', '--max_epochs', '1000'])

    main(args)

# wandb sweep

In [6]:
sweep_config = {
  "name" : "sweep",
  "method" : "random",
  "parameters": {
    "epochs": {
      "value": 500
    },
    "learning_rate": {
      "distribution": "log_uniform_values",
      "min": 0.00001,
      "max": 0.1
    },
    "dim" : {
      "distribution": "int_uniform",
      "min": 128,
      "max": 512
    },
    "layer_hidden": {
      "distribution": "int_uniform",
      "min": 8,
      "max": 32
    },
    "layer_output": {
      "distribution": "int_uniform",
      "min": 8,
      "max": 20
    }
  }
}

sweep_id = wandb.sweep(sweep_config, project='origgnn_sweep')

Create sweep with ID: hibrotxa
Sweep URL: http://localhost:8080/huabei/origgnn_sweep/sweeps/hibrotxa


In [7]:
def arg_for_sweep(config: dict):
    # prepare args
    parser = prepare_arg()
    dataset_path = '/home/huabei/Projects/SMTarRNA/project/data/in_man_exhaustiveness_96_orig_conformation.txt'
    checkpoint = '20220618_211314.ckpt'
    # add model args
    parser = MolecularGNN.add_model_specific_args(parent_parser=parser)
    # add Trainer args
    parser = Trainer.add_argparse_args(parser)
    hyperparameter_list = ['--data_path', dataset_path, '--gpus=1']
    for key, value in config.items():
        hyperparameter_list.extend(['--' + key, value])

    args = parser.parse_args(hyperparameter_list)
    return args
    # args = parser.parse_args(['--data_path', dataset_path, '--learning_rate', '0.0001', '--gpus=1', '--max_epochs', '1000'])

def train():
    with wandb.init(dir='log/origgnn') as run:
        config = wandb.config
        args = arg_for_sweep(config=config)
        # print(config)
        main(args)

In [8]:
count = 5 # number of runs to execute
wandb.agent(sweep_id, function=train, count=count)

[34m[1mwandb[0m: Agent Starting Run: qhasqaz9 with config:
[34m[1mwandb[0m: 	dim: 363
[34m[1mwandb[0m: 	epochs: 500
[34m[1mwandb[0m: 	layer_hidden: 21
[34m[1mwandb[0m: 	layer_output: 15
[34m[1mwandb[0m: 	learning_rate: 0.014351224670118002
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.045 MB of 0.045 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run qhasqaz9 errored: NameError("name 'prepare_arg' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run qhasqaz9 errored: NameError("name 'prepare_arg' is not defined")
[34m[1mwandb[0m: Agent Starting Run: s8az5lwp with config:
[34m[1mwandb[0m: 	dim: 286
[34m[1mwandb[0m: 	epochs: 500
[34m[1mwandb[0m: 	layer_hidden: 28
[34m[1mwandb[0m: 	layer_output: 10
[34m[1mwandb[0m: 	learning_rate: 0.00011209028064643587
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.045 MB of 0.045 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run s8az5lwp errored: NameError("name 'prepare_arg' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run s8az5lwp errored: NameError("name 'prepare_arg' is not defined")
[34m[1mwandb[0m: Agent Starting Run: wobh1jtv with config:
[34m[1mwandb[0m: 	dim: 265
[34m[1mwandb[0m: 	epochs: 500
[34m[1mwandb[0m: 	layer_hidden: 12
[34m[1mwandb[0m: 	layer_output: 11
[34m[1mwandb[0m: 	learning_rate: 0.0008671401087566827
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='0.045 MB of 0.045 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run wobh1jtv errored: NameError("name 'prepare_arg' is not defined")
[34m[1mwandb[0m: [32m[41mERROR[0m Run wobh1jtv errored: NameError("name 'prepare_arg' is not defined")
Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 3 failed runs in the first 60 seconds, killing sweep.
[34m[1mwandb[0m: To disable this check set WANDB_AGENT_DISABLE_FLAPPING=true
