### Experiment Tracking with W&B

- config: store hp and metadata for each run
- wandb.init
- wandb.watch: log model gradients and params over time (helps detect bugs e.g. weird grad behaviour)
- wandb.log: log stuff we care about
- wandb.save: save online

use with block in context manager syntax

In [None]:
import wandb
wandb.login()

In [None]:
config = dict(
    epochs = 50,
    val_ratio = 0,
    test_ratio = 0.2
)

In [None]:
def make(base_path, val_ratio, test_ratio, encode_data_name, decode_data_name, latent_dim):
    # TODO: make edges to device here on when called on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # dataset to encode
    encode_dataset = ReactionDataset(base_path, geo_file = encode_data_name, dataset_type= 'individual')
    encode_data = encode_dataset.data
    encode_data.train_mask = encode_data.val_mask = encode_data.test_mask = encode_data.y = None
    encode_data = train_test_split_edges(data = encode_data, val_ratio = val_ratio, test_ratio = test_ratio)
    encode_x = encode_data.x.to(device)
    encode_train_pos_edge_index = encode_data.train_pos_edge_index.to(device)

    # dataset to decode
    decode_dataset = ReactionDataset(base_path, geo_file = decode_data_name, dataset_type= 'individual')
    decode_data = decode_dataset.data
    decode_data.train_mask = decode_data.val_mask = decode_data.test_mask = decode_data.y = None
    decode_data = train_test_split_edges(data = decode_data, val_ratio = val_ratio, test_ratio = test_ratio)
    decode_x = decode_data.x.to(device)
    decode_train_pos_edge_index = decode_data.train_pos_edge_index.to(device)

    # model creation
    gae = GAE(MolEncoder(encode_data.num_node_features, latent_dim))
    opt = torch.optim.Adam(gae.parameters(), lr = 0.01)

    return gae, opt, encode_data, decode_data

In [None]:
def model_pipeline(hps):

    # start wandb
    with wandb.init(project="test", config=hps):
        
        # access hps through wandb.config so logging matches execution
        config = wandb.config

        # model data
        
        val_ratio = 0
        test_ratio = 0.2
        
        # make model, data, opt problem
        ts_r_gae, ts_r_opt, r_data, ts_data = make(r'data/', 0, 0.2, 'train_r', 'train_ts', 2)

### Testing GAEs

In [1]:
# data processing
from ts_vae.data_processors.grambow_processor import ReactionDataset

# my GAEs
from ts_vae.gaes.n_gae import Node_AE, train_node_ae, test_node_ae
from ts_vae.gaes.ne_gae import NodeEdge_AE, train_ne_ae, test_ne_ae
from ts_vae.gaes.nec_gae import NodeEdgeCoord_AE, train_nec_ae, test_nec_ae, main

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F

# torch geometric
from torch_geometric.data import DataLoader
from torch_geometric.utils import to_dense_adj

# other
import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score

In [2]:
# remove processed files
import os
import glob

files = glob.glob(r'data/processed/*')
for f in files:
    os.remove(f)

In [3]:
rxns = ReactionDataset(r'data')
reactants = rxns.data.r
transition_states = rxns.data.ts
products = rxns.data.p

# train_loader = DataLoader(rxns[: num_train], batch_size = 2, follow_batch = ['r', 'p'])
# test_loader = DataLoader(rxns[num_train:], batch_size = 2, follow_batch = ['r', 'p'])

num_rxns = len(rxns)
train_ratio = 0.8
num_train = int(np.floor(train_ratio * num_rxns))

batch_size = 20

# need to be able to recover original reactants after encoding
# note: no padding, since PyG automatically factors this in
train_loaders = {'r':  DataLoader(reactants[: num_train], batch_size), 
                 'ts': DataLoader(transition_states[: num_train], batch_size), 
                 'p':  DataLoader(products[: num_train], batch_size)}

test_loaders =  {'r':  DataLoader(reactants[num_train: ], batch_size), 
                 'ts': DataLoader(transition_states[num_train: ], batch_size), 
                 'p':  DataLoader(products[num_train: ], batch_size)}

Processing...


 15%|█▍        | 1000/6739 [00:02<00:16, 350.72it/s]
100%|██████████| 842/842 [00:00<00:00, 1062.10it/s]
 15%|█▍        | 1000/6739 [00:01<00:09, 578.16it/s]
100%|██████████| 842/842 [00:00<00:00, 1158.25it/s]
 15%|█▍        | 1000/6739 [00:02<00:12, 471.32it/s]
100%|██████████| 842/842 [00:02<00:00, 341.44it/s]


Done!


In [4]:
### Node AE
max_num_nodes = max([r.z.size(0) for r in train_loaders['r'].dataset])
assert([r.x.size(1) for r in train_loaders['r'].dataset] == [train_loaders['r'].dataset[0].x.size(1)] * len(train_loaders['r'].dataset))
num_node_fs = train_loaders['r'].dataset[0].x.size(1)
num_edge_fs = train_loaders['r'].dataset[0].edge_attr.size(1)
h_nf = 5
emb_nf = 2

# in_node_nf + in_edge_nf >= h_nf >= out_nf > emb_nf 
node_ae = Node_AE(in_node_nf = num_node_fs, in_edge_nf = num_edge_fs, h_nf = h_nf, out_nf = h_nf, emb_nf = emb_nf)
node_opt = torch.optim.Adam(node_ae.parameters(), lr = 1e-3)

# train and test, add epochs after
train_loss, train_res = train_node_ae(node_ae, node_opt, train_loaders['r'])
test_loss, test_res = test_node_ae(node_ae, node_opt, test_loaders['r']) 

In [5]:
### NodeEdge AE
max_num_nodes = max([r.z.size(0) for r in train_loaders['r'].dataset])
assert([r.x.size(1) for r in train_loaders['r'].dataset] == [train_loaders['r'].dataset[0].x.size(1)] * len(train_loaders['r'].dataset))
num_node_fs = train_loaders['r'].dataset[0].x.size(1)
num_edge_fs = train_loaders['r'].dataset[0].edge_attr.size(1)
h_nf = 5
emb_nf = 2

# model and opt
ne_ae = NodeEdge_AE(in_node_nf = num_node_fs, in_edge_nf = num_edge_fs, h_nf = h_nf, out_nf = h_nf, emb_nf = emb_nf)
ne_opt = torch.optim.Adam(ne_ae.parameters(), lr = 1e-3)

# train and test
train_loss, train_res = train_ne_ae(ne_ae, ne_opt, train_loaders['r'])
test_loss, test_res = test_ne_ae(ne_ae, test_loaders['r']) 

In [6]:
### NodeEdgeCoord AE
max_num_nodes = max([r.z.size(0) for r in train_loaders['r'].dataset])
assert([r.x.size(1) for r in train_loaders['r'].dataset] == [train_loaders['r'].dataset[0].x.size(1)] * len(train_loaders['r'].dataset))
num_node_fs = train_loaders['r'].dataset[0].x.size(1)
num_edge_fs = train_loaders['r'].dataset[0].edge_attr.size(1)
h_nf = 5
emb_nf = 2

# model and opt
nec_ae = NodeEdgeCoord_AE(in_node_nf = num_node_fs, in_edge_nf = num_edge_fs, h_nf = h_nf, out_nf = h_nf, emb_nf = emb_nf)
nec_opt = torch.optim.Adam(nec_ae.parameters(), lr = 1e-3)

# train and test
train_loss, train_res = train_nec_ae(nec_ae, nec_opt, train_loaders['r'])
test_loss, test_res = test_nec_ae(nec_ae, test_loaders['r'])

In [13]:
# NodeEdge Model

epochs = 20
test_interval = 5

final_res = {'epochs': [], 'train_loss_arr': [], 'train_res_arr': [], 
             'test_loss_arr': [], 'test_res_arr': [], 'best_test': 1e10, 'best_epoch': 0}

# r_ae.reset_parameters()

for epoch in range(1, epochs + 1):
    
    train_loss, train_res = train_ne_ae(ne_ae, ne_opt, train_loaders['r'])
    final_res['train_loss_arr'].append(train_loss)
    final_res['train_res_arr'].append(train_res)
    print(f"===== Training epoch {epoch:03d} complete with loss: {train_loss:.4f} ====")
    
    if epoch % test_interval == 0:
    
        test_loss, test_res = test_ne_ae(ne_ae, test_loaders['r'])
        final_res['test_loss_arr'].append(test_loss)
        final_res['test_res_arr'].append(test_res)
        print(f'===== Testing epoch: {epoch:03d}, Loss: {test_loss:.4f} ===== \n')
        
        if test_loss < final_res['best_test']:
            final_res['best_test'] = test_loss
            final_res['best_epoch'] = epoch

===== Training epoch 001 complete with loss: 2.9641 ====
===== Training epoch 002 complete with loss: 3.0421 ====
===== Training epoch 003 complete with loss: 3.0840 ====
===== Training epoch 004 complete with loss: 3.0168 ====
===== Training epoch 005 complete with loss: 2.9672 ====
===== Testing epoch: 005, Loss: 2.8587 ===== 

===== Training epoch 006 complete with loss: 2.9764 ====
===== Training epoch 007 complete with loss: 3.0892 ====
===== Training epoch 008 complete with loss: 3.0578 ====
===== Training epoch 009 complete with loss: 3.0092 ====
===== Training epoch 010 complete with loss: 2.9911 ====
===== Testing epoch: 010, Loss: 2.7333 ===== 

===== Training epoch 011 complete with loss: 2.9480 ====
===== Training epoch 012 complete with loss: 2.9414 ====
===== Training epoch 013 complete with loss: 2.9321 ====
===== Training epoch 014 complete with loss: 3.0139 ====
===== Training epoch 015 complete with loss: 3.0026 ====
===== Testing epoch: 015, Loss: 3.0517 ===== 

====

In [None]:
# NodeEdgeCoord Model

epochs = 20
test_interval = 5

final_res = {'epochs': [], 'train_loss_arr': [], 'train_res_arr': [], 
             'test_loss_arr': [], 'test_res_arr': [], 'best_test': 1e10, 'best_epoch': 0}

# r_ae.reset_parameters()

for epoch in range(1, epochs + 1):
    
    train_loss, train_res = train_nec_ae(nec_ae, nec_opt, train_loaders['r'])
    final_res['train_loss_arr'].append(train_loss)
    final_res['train_res_arr'].append(train_res)
    print(f"===== Training epoch {epoch:03d} complete with loss: {train_loss:.4f} ====")
    
    if epoch % test_interval == 0:
    
        test_loss, test_res = test_nec_ae(nec_ae, test_loaders['r'])
        final_res['test_loss_arr'].append(test_loss)
        final_res['test_res_arr'].append(test_res)
        print(f'===== Testing epoch: {epoch:03d}, Loss: {test_loss:.4f} ===== \n')
        
        if test_loss < final_res['best_test']:
            final_res['best_test'] = test_loss
            final_res['best_epoch'] = epoch

In [11]:
# final_res['train_res_arr']
final_res['test_res_arr']

# node_loss > 1.7, usually > 2 but gets worse...
# adj_loss always 0.3
# edge_loss goes from 0.8 -> 0.1
# coord_loss = 0 always...

[{'total_loss': 1057.0986795425415,
  'counter': 369,
  'total_loss_arr': [3.0813746452331543,
   2.6163179874420166,
   3.012462854385376,
   2.5098307132720947,
   2.5540337562561035,
   2.899033308029175,
   3.2690751552581787,
   2.8928704261779785,
   2.990562677383423,
   3.501192331314087,
   2.8398962020874023,
   2.6030735969543457,
   2.548585891723633,
   2.960947036743164,
   2.4626095294952393,
   2.971714973449707,
   2.8775155544281006,
   2.8694534301757812,
   3.098630905151367],
  'coord_loss_arr': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0],
  'node_recon_loss_arr': [1.9208110570907593,
   1.7394442558288574,
   2.1114180088043213,
   2.0683984756469727,
   1.9450640678405762,
   2.4605486392974854,
   2.156846523284912,
   2.2254295349121094,
   2.4294798374176025,
   2.4254672527313232,
   2.4140782356262207,
   2.1779332160949707,
   1.9332128763198853,
   2.4

Convert MLP to GNN by swapping torch.nn.Linear with PyG's GNN operators e.g. GCN layer
Lucky's work
- PairFeatures: a manual MP I think. it has to be otherwise what he's doing isn't a GNN.
- set edges: iterate: 
    - compute features (i.e. MP) -> MLP(features) -> update edges
    - compute features (i.e. MP) -> MLP(MLP(edges)) -> update vertices

Loose notes
- Can define data class for parameters e.g. 
    - @dataclass
      class GNNParams:
        input_dim: int
        output_dim: int
        ... (hidden_sizes, dropout, batchnorm, activation)