In [1]:
import networkx as nx
import numpy as np

import pandas as pd

from codes.dataset import *
from codes.model import *

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
data_list = load_protein_dataset()

Loading graph dataset: PROTEINS_full
Graph Nums 1113
Loaded 739 graphs having node sizes in permissible limits


In [3]:
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(data_list, test_size = 0.2)

In [31]:
device = 'cpu'

nodes = [data.num_nodes for data in data_list]
edges = [data.num_edges for data in data_list]
print('Node: max: {}, min: {}, mean: {}'.format(max(nodes), min(nodes), sum(nodes)/len(nodes)))
print('Edge: max: {}, min: {}, mean: {}'.format(max(edges), min(edges), sum(edges)/len(edges)))


# model
num_features = data_list[0].x.shape[1]

input_dim = num_features

OUTPUT_DIM = 256
FEATURE_DIM = 256
HIDDEN_DIM = 256
EPOCHS = 20
LEARNING_RATE = 0.01

Node: max: 620, min: 20, mean: 52.84979702300406
Edge: max: 2098, min: 46, mean: 197.32070365358592


In [32]:
model = PairwiseModel(input_dim=input_dim, feature_dim=FEATURE_DIM, hidden_dim=HIDDEN_DIM, model_type="GraphSAGE",
                      aggregator = "max", output_dim=OUTPUT_DIM)

optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [33]:
loss_func = nn.BCEWithLogitsLoss()

train_loss = []
test_loss = []

train_acc = []
test_acc = []

for epoch in range(EPOCHS):
    train_loss_temp = []
    test_loss_temp = []

    train_acc_temp = []
    test_acc_temp = []
    
    model.train()
    shuffle(train_data)

    for i, data in enumerate(train_data):
        
        pred = model(data)
        label = torch.from_numpy(data.pair_nodes_class).float()

        optimizer.zero_grad()

        loss = loss_func(pred, label)

        # update
        loss.backward()
        optimizer.step()
        
        accuracy = np.mean(label.numpy() == (pred.detach().numpy() > 0.5))
        
        print("Epoch {}/{}, Iteration {}/{}, Loss {}, Accuracy {}".format(epoch + 1, EPOCHS, i + 1, len(train_data),
                                                                         round(loss.item(), 3), round(accuracy, 3)),
             end = "\r", flush = True)
        train_loss_temp.append(loss.item())
        train_acc_temp.append(accuracy)
    model.eval()
    for i, data in enumerate(test_data):
        
        pred = model(data)
        label = torch.from_numpy(data.pair_nodes_class).float()
        
        loss = loss_func(pred, label)
        
        accuracy = np.mean(label.numpy() == (pred.detach().numpy() > 0.5))
        
        test_loss_temp.append(loss.item())
        test_acc_temp.append(accuracy)
        
    
    train_loss.append(np.mean(train_loss_temp))
    test_loss.append(np.mean(test_loss_temp))

    train_acc.append(np.mean(train_acc_temp))
    test_acc.append(np.mean(test_acc_temp))
    
    if ((epoch + 1) % (EPOCHS // 10) == 0):
        print("Epoch {}/{}, Train Loss {}, Test Loss {}, Train Accuracy {}, Test Accuracy {}".format(epoch + 1, EPOCHS, round(np.mean(train_loss_temp), 3), round(np.mean(test_loss_temp), 3), 
                                                                                                     round(np.mean(train_acc_temp), 3), round(np.mean(test_acc_temp), 3)))

Epoch 1/20, Iteration 12/591, Loss 0.71, Accuracy 0.5664



Epoch 2/20, Train Loss 0.698, Test Loss 0.697, Train Accuracy 0.545, Test Accuracy 0.556
Epoch 4/20, Train Loss 0.695, Test Loss 0.692, Train Accuracy 0.539, Test Accuracy 0.551
Epoch 6/20, Train Loss 0.693, Test Loss 0.687, Train Accuracy 0.533, Test Accuracy 0.534
Epoch 8/20, Train Loss 0.693, Test Loss 0.687, Train Accuracy 0.533, Test Accuracy 0.531
Epoch 10/20, Train Loss 0.694, Test Loss 0.688, Train Accuracy 0.536, Test Accuracy 0.529
Epoch 12/20, Train Loss 0.692, Test Loss 0.687, Train Accuracy 0.529, Test Accuracy 0.528
Epoch 14/20, Train Loss 0.692, Test Loss 0.687, Train Accuracy 0.527, Test Accuracy 0.528
Epoch 16/20, Train Loss 0.692, Test Loss 0.686, Train Accuracy 0.531, Test Accuracy 0.526
Epoch 18/20, Train Loss 0.692, Test Loss 0.687, Train Accuracy 0.529, Test Accuracy 0.524
Epoch 20/20, Train Loss 0.692, Test Loss 0.687, Train Accuracy 0.526, Test Accuracy 0.524


In [36]:
from sklearn.metrics import roc_auc_score as roc_auc

def calculate_roc(model, data):
    preds = None
    labels = None
    
    for d in data:
        pred = model(d).detach().numpy().flatten()
        label = torch.from_numpy(d.pair_nodes_class).numpy().flatten()

        if preds is None:
            preds = pred
            labels = label
        else:
            preds = np.concatenate([preds, pred], axis = None)
            labels = np.concatenate([labels, label], axis= None)

    return roc_auc(label, pred)

In [37]:
calculate_roc(model, test_data)



0.517320222007722