In [1]:
import networkx as nx
import numpy as np

import pandas as pd

from codes.dataset import *
from codes.model import *

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
import torch_geometric as tg

train_data = tg.datasets.PPI(root=".", split = "train")
val_data = tg.datasets.PPI(root=".", split = "val")
test_data = tg.datasets.PPI(root=".", split = "test")

In [3]:
train_data

PPI(20)

In [4]:
test_data

PPI(2)

In [5]:
val_data

PPI(2)

In [6]:
train_data[0]

Data(edge_index=[2, 32318], x=[1767, 50], y=[1767, 121])

In [17]:
device = 'cpu'

nodes = [data.num_nodes for data in train_data]
edges = [data.num_edges for data in train_data]

print('Node: max: {}, min: {}, mean: {}'.format(max(nodes), min(nodes), sum(nodes)/len(nodes)))
print('Edge: max: {}, min: {}, mean: {}'.format(max(edges), min(edges), sum(edges)/len(edges)))


# model
num_features = train_data[0].x.shape[1]

input_dim = num_features

OUTPUT_DIM = 121
FEATURE_DIM = 256
HIDDEN_DIM = 256
EPOCHS = 100
LEARNING_RATE = 0.0001

Node: max: 3480, min: 591, mean: 2245.3
Edge: max: 106754, min: 7708, mean: 61318.4


In [18]:
model = MulticlassModel(input_dim=input_dim, feature_dim=FEATURE_DIM, feature_pre=False, hidden_dim=HIDDEN_DIM, model_type="GraphSAGE", aggregator = "max", output_dim=OUTPUT_DIM)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [19]:
loss_func = nn.BCEWithLogitsLoss()

train_loss = []
test_loss = []

train_acc = []
test_acc = []

for epoch in range(EPOCHS):
    train_loss_temp = []
    test_loss_temp = []

    train_acc_temp = []
    test_acc_temp = []
    
    model.train()

    for i, data in enumerate(train_data):
        
        pred = model(data)
        label = data.y

        optimizer.zero_grad()

        loss = loss_func(pred, label)

        # update
        loss.backward()
        optimizer.step()
        
        accuracy = np.mean(label.numpy() == (pred.detach().numpy() > 0.5))
        
        print("Epoch {}/{}, Iteration {}/{}, Loss {}, Accuracy {}".format(epoch + 1, EPOCHS, i + 1, len(train_data),
                                                                         round(loss.item(), 3), round(accuracy, 3)),
             end = "\r", flush = True)
        train_loss_temp.append(loss.item())
        train_acc_temp.append(accuracy)
    model.eval()
    for i, data in enumerate(test_data):
        
        pred = model(data)
        label = data.y
        
        loss = loss_func(pred, label)
        
        accuracy = np.mean(label.numpy() == (pred.detach().numpy() > 0.5))
        
        test_loss_temp.append(loss.item())
        test_acc_temp.append(accuracy)
        
    
    train_loss.append(np.mean(train_loss_temp))
    test_loss.append(np.mean(test_loss_temp))

    train_acc.append(np.mean(train_acc_temp))
    test_acc.append(np.mean(test_acc_temp))
    
    if ((epoch + 1) % (EPOCHS // 10) == 0):
        print("Epoch {}/{}, Train Loss {}, Test Loss {}, Train Accuracy {}, Test Accuracy {}".format(epoch + 1, EPOCHS, round(np.mean(train_loss_temp), 3), round(np.mean(test_loss_temp), 3), 
                                                                                                     round(np.mean(train_acc_temp), 3), round(np.mean(test_acc_temp), 3)))

Epoch 10/100, Train Loss 0.809, Test Loss 0.816, Train Accuracy 0.728, Test Accuracy 0.74
Epoch 20/100, Train Loss 0.809, Test Loss 0.816, Train Accuracy 0.729, Test Accuracy 0.741
Epoch 30/100, Train Loss 0.809, Test Loss 0.816, Train Accuracy 0.731, Test Accuracy 0.743
Epoch 40/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.735, Test Accuracy 0.749
Epoch 50/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.741, Test Accuracy 0.756
Epoch 60/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.748, Test Accuracy 0.762
Epoch 70/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.754, Test Accuracy 0.768
Epoch 80/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.76, Test Accuracy 0.778
Epoch 90/100, Train Loss 0.809, Test Loss 0.815, Train Accuracy 0.765, Test Accuracy 0.784
Epoch 100/100, Train Loss 0.808, Test Loss 0.815, Train Accuracy 0.769, Test Accuracy 0.787


In [20]:
from sklearn.metrics import roc_auc_score as roc_auc

def calculate_roc(model, data):
    preds = None
    labels = None
    
    for d in data:
        pred = model(d).detach().numpy().flatten()
        label = d.y.numpy().flatten()

        if preds is None:
            preds = pred
            labels = label
        else:
            preds = np.concatenate([preds, pred], axis = None)
            labels = np.concatenate([labels, label], axis= None)

    return roc_auc(label, pred)

In [21]:
calculate_roc(model, val_data)

0.8126529451404927