Next steps:
1. Add a "master node" that connects all nodes together, so that message passing works between all nodes

In [1]:
import pandas as pd

from LigandBinaryDataset import LigandBinaryDataset

import time
import yaml
import numpy as np
import torch
from torch_geometric.loader import DataLoader

with open('hyperparameters.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

In [None]:
dataset = LigandBinaryDataset('./data/')

Processing...


['data\\raw\\af2_dataset_training_labeled.csv']
['data\\processed\\processed.dataset']


  3%|â–Ž         | 24/723 [02:05<1:00:37,  5.20s/it]

In [None]:
dataset = dataset.shuffle()
train_dataset = dataset[:int(len(dataset) * 0.7)]
val_dataset = dataset[int(len(dataset) * 0.7):int(len(dataset) * 0.85)]
test_dataset = dataset[int(len(dataset) * 0.85):]

In [None]:
train_dl = DataLoader(train_dataset, batch_size=config['BATCH_SIZE'])
val_dl = DataLoader(val_dataset, batch_size=config['BATCH_SIZE'])
test_dl = DataLoader(test_dataset, batch_size=config['BATCH_SIZE'])

In [None]:
from LigandGNNV1 import LigandGNNV1

device = torch.device('cuda')
model = LigandGNNV1(dataset.num_node_features, 1).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
criterion = torch.nn.BCEWithLogitsLoss()

In [None]:
def train(model, loader, criterion, optimizer):
    model.train()

    loss_acc = 0.
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        label = data.y

        loss = criterion(output, label.reshape(-1, 1))
        loss.backward()
        loss_acc += data.num_graphs * loss.item()
        optimizer.step()

    return loss_acc / len(loader.dataset)

In [None]:
from sklearn.metrics import roc_auc_score

def evaluate(model, loader):
    model.eval()

    preds = np.asarray([])
    labels = np.asarray([])

    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            pred = model(data).round().squeeze().cpu().numpy()
            label = data.y.cpu().numpy()

            preds = np.concatenate([preds, pred])
            labels = np.concatenate([labels, label])

    return roc_auc_score(labels, preds)

In [None]:
for epoch in range(1, 201):
    s = time.time()
    loss = train(model, train_dl, criterion, optimizer)
    train_score = evaluate(model, train_dl)
    val_score = evaluate(model, val_dl)
    e = time.time()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.05f}, Train Score: {train_score:.05f}, Val Score: {val_score:.05f}, Time: {e - s:.05f}')

In [None]:
evaluate(model, test_dl)

In [None]:
pred = model(test_dl.dataset[0].to(device))

In [None]:
torch.sigmoid(pred).round().sum()

In [None]:
data = dataset[0].x