### Experiment Tracking with W&B

- config: store hp and metadata for each run
- wandb.init
- wandb.watch: log model gradients and params over time (helps detect bugs e.g. weird grad behaviour)
- wandb.log: log stuff we care about
- wandb.save: save online

use with block in context manager syntax

In [None]:
import wandb
wandb.login()

In [None]:
config = dict(
    epochs = 50,
    val_ratio = 0,
    test_ratio = 0.2
)

In [None]:
def make(base_path, val_ratio, test_ratio, encode_data_name, decode_data_name, latent_dim):
    # TODO: make edges to device here on when called on
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # dataset to encode
    encode_dataset = ReactionDataset(base_path, geo_file = encode_data_name, dataset_type= 'individual')
    encode_data = encode_dataset.data
    encode_data.train_mask = encode_data.val_mask = encode_data.test_mask = encode_data.y = None
    encode_data = train_test_split_edges(data = encode_data, val_ratio = val_ratio, test_ratio = test_ratio)
    encode_x = encode_data.x.to(device)
    encode_train_pos_edge_index = encode_data.train_pos_edge_index.to(device)

    # dataset to decode
    decode_dataset = ReactionDataset(base_path, geo_file = decode_data_name, dataset_type= 'individual')
    decode_data = decode_dataset.data
    decode_data.train_mask = decode_data.val_mask = decode_data.test_mask = decode_data.y = None
    decode_data = train_test_split_edges(data = decode_data, val_ratio = val_ratio, test_ratio = test_ratio)
    decode_x = decode_data.x.to(device)
    decode_train_pos_edge_index = decode_data.train_pos_edge_index.to(device)

    # model creation
    gae = GAE(MolEncoder(encode_data.num_node_features, latent_dim))
    opt = torch.optim.Adam(gae.parameters(), lr = 0.01)

    return gae, opt, encode_data, decode_data

In [None]:
def model_pipeline(hps):

    # start wandb
    with wandb.init(project="test", config=hps):
        
        # access hps through wandb.config so logging matches execution
        config = wandb.config

        # model data
        
        val_ratio = 0
        test_ratio = 0.2
        
        # make model, data, opt problem
        ts_r_gae, ts_r_opt, r_data, ts_data = make(r'data/', 0, 0.2, 'train_r', 'train_ts', 2)

### Testing GAEs

In [8]:
from ts_vae.gae import EGNN, EGNN_NEC, MolGraph_AE
from ts_vae.layers import GCL_PYG
from ts_vae.data_processors.new_pyg_processor import ReactionDataset
from ts_vae.simple_gaes.node_gae_act import Node_AE

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import DataLoader

# should double check this func works okay
from torch_geometric.utils import to_dense_adj


import numpy as np
from sklearn.metrics import roc_auc_score, average_precision_score

In [9]:
# remove processed files

import os
import glob

files = glob.glob(r'data/processed/*')
for f in files:
    os.remove(f)

## New try

In [10]:
rxns = ReactionDataset(r'data')
reactants = rxns.data.r
transition_states = rxns.data.ts
products = rxns.data.p

# train_loader = DataLoader(rxns[: num_train], batch_size = 2, follow_batch = ['r', 'p'])
# test_loader = DataLoader(rxns[num_train:], batch_size = 2, follow_batch = ['r', 'p'])

num_rxns = len(rxns)
train_ratio = 0.8
num_train = int(np.floor(train_ratio * num_rxns))

batch_size = 2

# need to be able to recover original reactants after encoding
# note: no padding, since PyG automatically factors this in
train_loaders = {'r':  DataLoader(reactants[: num_train], batch_size), 
                 'ts': DataLoader(transition_states[: num_train], batch_size), 
                 'p':  DataLoader(products[: num_train], batch_size)}

test_loaders =  {'r':  DataLoader(reactants[num_train: ], batch_size), 
                 'ts': DataLoader(transition_states[num_train: ], batch_size), 
                 'p':  DataLoader(products[num_train: ], batch_size)}

Processing...


  0%|          | 30/6739 [00:00<00:08, 750.46it/s]
  4%|▎         | 30/842 [00:00<00:03, 220.83it/s]
  0%|          | 30/6739 [00:00<00:30, 216.69it/s]
  4%|▎         | 30/842 [00:00<00:02, 331.62it/s]
  0%|          | 30/6739 [00:00<00:27, 244.57it/s]
  4%|▎         | 30/842 [00:00<00:01, 469.09it/s]


Done!


In [11]:
max_num_nodes = max([r.z.size(0) for r in test_loaders['r'].dataset])
assert([r.x.size(1) for r in test_loaders['r'].dataset] == [test_loaders['r'].dataset[0].x.size(1)] * len(test_loaders['r'].dataset))
num_node_fs = test_loaders['r'].dataset[0].x.size(1)
num_edge_fs = test_loaders['r'].dataset[0].edge_attr.size(1)
h_nf = 5
emb_nf = 2

# in_node_nf + in_edge_nf >= h_nf >= out_nf > emb_nf 
node_ae = Node_AE(in_node_nf = num_node_fs, in_edge_nf = num_edge_fs, h_nf = h_nf, out_nf = h_nf, emb_nf = emb_nf)
opt = torch.optim.Adam(node_ae.parameters(), lr = 1e-3)

In [157]:
# value = (z[edge_index[0]] * z[edge_index[1]]).sum(dim=1)

# x[edge_index[0]].shape = [num_nodes, 2]
# (x[edge_index[0]] * x[edge_index[1]]).shape = [num_nodes, 2]
# (x[edge_index[0]] * x[edge_index[1]]).sum(dim=1).shape = [num_nodes]

x = node_emb

# (x[edge_index[0]] * x[edge_index[1]]).sum(dim=1).shape

x_a = x.unsqueeze(0) # dim: [1, num_nodes, 2]
x_b = torch.transpose(x_a, 0, 1) # dim: [num_nodes, 1, 2], t.t([_, dim to t, dim to t])

X = (x_a - x_b) ** 2  # dim: [num_nodes, num_nodes, 2]

num_nodes = x.size(0) # num_nodes (usually as number of nodes in batch)
X = X.view(num_nodes ** 2, -1) # dim: [num_nodes^2, 2] to apply sum 

# (lin_sig or not) layer, dim=1 sums to dim=[num_nodes^2]
# gives porbabilistic adj matrix
X = torch.sigmoid(W * torch.sum(X, dim = 1) + b) if linear_sig else torch.sum(X, dim = 1)

adj_pred = X.view(num_nodes, num_nodes) # dim: [num_nodes, num_nodes]
# remove diags
adj_pred = adj_pred * (1 - torch.eye(num_nodes).to(self.device))



torch.Size([30, 2])

In [15]:
from sklearn.metrics import roc_auc_score, average_precision_score
import torch.nn.functional as F

### test on single batch

# get data
r_batch = next(iter(test_loaders['r']))
node_feats, edge_index, edge_attr, batch_vec = r_batch.x, r_batch.edge_index, r_batch.edge_attr, r_batch.batch

# generate node embedding and predicted adj
node_emb, adj_pred = node_ae(node_feats, edge_index, edge_attr)

# ground truth adj matrix; if add batch vec, you get two, if add edge_attr get x4 in dim
# adj_gt = to_dense_adj(edge_index, batch_vec, edge_attr)
adj_gt = to_dense_adj(edge_index = edge_index).squeeze(dim = 0)

assert adj_gt.shape == adj_pred.shape, "Your adjacency matrices don't have the same shape!"

# adj_gt, adj_pred = adj_gt.detach().cpu().numpy(), adj_pred.detach().cpu().numpy()

# roc_auc_score(adj_gt, adj_pred), average_precision_score(adj_gt, adj_pred)
# F.binary_cross_entropy(adj_pred, adj_gt)

loss = F.binary_cross_entropy(adj_pred, adj_gt)
loss

tensor(0.3936, grad_fn=<BinaryCrossEntropyBackward>)

In [158]:
# decode to adj of number of node features you have
# how to compare adj matrices?

# R->R: node ae, train
# R->R: node + edge ae, train
# R->R: node + edge + coords, train
# same for R->TS, P->TS, (R,P)->TS

# look at DGL for improvements: k-hop graph func + khop adj util func

Batch(batch=[15], edge_attr=[30, 4], edge_index=[2, 30], idx=[1], pos=[15, 3], ptr=[2], x=[15, 11], z=[15])

In [12]:
### test on single batch

obj = nn.BCELoss()

def train_node_ae(node_ae, opt, loader):

    # use dict to record results, TODO: experiment dataclass: loss, epoch, batch_size
    res = {'loss': 0, 'counter': 0, 'loss_arr': []}

    for i, rxn_batch in enumerate(loader):

        node_ae.train()
        opt.zero_grad()

        # generate node embeddings and predicted adj matrix
        node_feats, edge_index, edge_attr = rxn_batch.x, rxn_batch.edge_index, rxn_batch.edge_attr
        batch_size, batch_vec = len(rxn_batch.idx), rxn_batch.batch
        node_emb, adj_pred = node_ae(node_feats, edge_index, edge_attr)

        # ground truth adj matrix; if add batch vec, you get two, if add edge_attr get x4 in dim
        adj_gt = to_dense_adj(edge_index = edge_index).squeeze(dim = 0)
        assert adj_gt.shape == adj_pred.shape, "Your adjacency matrices don't have the same shape!"
        
        # calculate loss for adj and step

        # adj_gt, adj_pred = adj_gt.detach().cpu().numpy(), adj_pred.detach().cpu().numpy()
        # loss = roc_auc_score(adj_gt, adj_pred) # , average_precision_score(adj_gt, adj_pred), other metrics

        # loss = obj(adj_pred, adj_gt)
        loss = F.binary_cross_entropy(adj_pred, adj_gt)
        loss.backward() 
        opt.step()

        # record batch results
        res['loss'] += loss.item() * batch_size
        res['counter'] += batch_size
        res['loss_arr'].append(loss.item())
    
    return res['loss'] / res['counter']

train_node_ae(node_ae, opt, test_loaders['r'])

TypeError: 'int' object is not callable

In [None]:
# first test with reactants

def train_mg_ae(gae, opt):
    
    # lr_scheduler.step()

    # simple results dict for now
    res = {'loss': 0, 'counter': 0, 'loss_arr': []}

    for i, rxn_batch in enumerate(test_loaders['r']):
        
        gae.train()
        opt.zero_grad()
    
        # from batch get data info: node_feats, edge_index, edge_attr
        # pass into model and get out ... adj?
        node_feats, edge_index, edge_attr = rxn_batch.x, rxn_batch.edge_index, rxn_batch.edge_attr
        
        # to(device)?
        emb_node_feats = gae(node_feats, edge_index, edge_attr)
        
        # calc loss
        loss = 0 # = bce(adj_pred, adj_gt)
        loss.backward() 
        opt.step()

        # add results to experiment dataclass: loss, epoch,  batch_size
        # temp: use dict
        res['loss'] += loss.item() * batch
    
    # return final loss i.e. return experimentlog final loss
    pass


In [None]:
def train_gae(gae, opt, x, train_pos_edge_index):
    gae.train()
    opt.zero_grad()
    print("train x shape: ", x.shape)
    z = gae.encode(x, train_pos_edge_index)
    print("train z shape: ", z.shape)
    loss = gae.recon_loss(z, train_pos_edge_index)
    loss.backward()
    opt.step()
    return float(loss)

def test_gae(gae, x, train_pos_edge_index, test_pos_edge_index, test_neg_edge_index):
    gae.eval()
    with torch.no_grad():
        z = gae.encode(x, train_pos_edge_index)
    return gae.test(z, test_pos_edge_index, test_neg_edge_index)

def new_test_gae(gae, x, edge_index):
    # this just does recon loss again
    gae.eval()
    with torch.no_grad():
        print("test x shape: ", x.shape)
        z = gae.encode(x, edge_index)
        print("test z shape: ", z.shape)
    return gae.recon_loss(z, edge_index)

r_ae.reset_parameters()

epochs = 10
for epoch in range(1, epochs + 1):

    # value = (z[edge_index[0]] * z[edge_index[1]]).sum(dim = 1)
    loss_train = train_gae(r_ae, r_opt, r_x, r_data.edge_index)
    print("===== Training complete with loss: {:.4f}, now testing ====".format(loss_train))
    loss_test = new_test_gae(r_ae, test_x, test_data.edge_index)
    if epoch % 1 == 0:
        print('===== Epoch: {:03d}, Loss: {:.4f} ===== \n'.format(epoch, loss_test))