### Import necessary libraries

In [1]:
import sys
import timeit

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import networkx as nx

from sklearn.metrics import accuracy_score, precision_score, recall_score

### Check if GPU is available

In [2]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

### Helper functions

In [3]:
def create_adjacency(g):
    A     = nx.to_numpy_matrix(g)
    A_hat = A/np.linalg.norm(A, ord=1, axis=1, keepdims=True)
    
    return A_hat

def load_tensor(file_name, dtype):
    return [dtype(d).to(device) for d in np.load(file_name + '.npy', allow_pickle=True)]

### Define a Neural Network class

In [4]:
class MyGCN(nn.Module):
    
    def __init__(self):
        super(MyGCN, self).__init__()
        self.W_gcn = nn.ModuleList([nn.Linear(dim, dim) for _ in range(layer)])
        self.W_property = nn.Linear(dim, 2)
    
    def update(self, xs, adjacency, i):
        hs = torch.relu(self.W_gcn[i](xs))
        return torch.matmul(adjacency, hs)
    
    def forward(self, g):
        adjacency = torch.FloatTensor(create_adjacency(g)).to(device)
        
        ### Create initial embedding: TODO
        xs = torch.FloatTensor(np.eye(1000)).to(device)
        
        for i in range(layer):
            xs = self.update(xs, adjacency, i)
        
        xs = torch.unsqueeze(torch.sum(xs,0),0)
        z_properties = self.W_property(xs)
        
        return z_properties
        
    def __call__(self, index, train=True):
        G = nx.read_gpickle('graph_' + str(int(index)) + '.gpickle')
        
        if index<=num_examples/2:
            t_properties = torch.LongTensor([0]).to(device)
        else:
            t_properties = torch.LongTensor([1]).to(device)
            
        z_properties = self.forward(G)
        
        if train:
            loss = F.cross_entropy(z_properties, t_properties)
            return loss
        else:
            zs     = torch.softmax(z_properties,1).to('cpu').data.numpy()
            ts     = t_properties.to('cpu').data.numpy()
            labels = np.argmax(zs)
            
            return labels, ts

### Create a trainer class

In [5]:
class Trainer(object):
    
    def __init__(self, model):
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr, weight_decay=0.01)
        
    def train(self, dataset_train):
        np.random.shuffle(dataset_train)
        N = len(dataset_train)
        loss_total = 0
        for i in range(0, N):
            data_batch = dataset_train[i]
            loss = self.model(data_batch)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            loss_total += loss.to('cpu').data.numpy()
        return loss_total

### Create a tester class

In [6]:
class Tester(object):
    
    def __init__(self, model):
        self.model = model

    def test(self, dataset_test):

        N = len(dataset_test)
        label_list, t_list = [], []

        for i in range(0, N):
            data_batch = dataset_test[i]
            labels, ts = self.model(data_batch, train=False)
            label_list = np.append(label_list, labels)
            t_list     = np.append(t_list, ts)
        
        auc       = accuracy_score(t_list, label_list)
        precision = precision_score(t_list, label_list)
        recall    = recall_score(t_list, label_list)
        
        return auc, precision, recall
    
    def result(self, epoch, time, loss, auc_dev, auc_test, precision, recall, file_name):
        with open(file_name, 'a') as f:
            result = map(str, [epoch, time, loss, auc_dev, auc_test, precision, recall])
            f.write('\t'.join(result) + '\n')

### Model Hyperparameters

In [7]:
dim            = 1000
layer          = 2
lr             = 1e-3
lr_decay       = 0.75
decay_interval = 20
iteration      = 15
num_examples   = 200

(dim, layer, decay_interval, iteration, num_examples) = map(int, [dim, layer, decay_interval, iteration, num_examples])
lr, lr_decay                            = map(float, [lr, lr_decay])

### Data preparation

In [8]:
def split_dataset(dataset, ratio):
    n = int(ratio * len(dataset))
    dataset_1, dataset_2 = dataset[:n], dataset[n:]
    return dataset_1, dataset_2

dataset = np.linspace(1,num_examples,num_examples)
np.random.shuffle(dataset)

dataset_train, dataset_   = split_dataset(dataset, 0.8)
dataset_dev, dataset_test = split_dataset(dataset_, 0.5)

### Create and train model

In [None]:
torch.manual_seed(1234)

model   = MyGCN().to(device)
trainer = Trainer(model)
tester  = Tester(model)

print('Training...')
print('Epoch \t Time(sec) \t Loss_train \t AUC_dev \t AUC_test \t Precision \t Recall')

start = timeit.default_timer()

setting = 'layers_' + str(layer)

file_result = 'output/' + setting + '.txt'
with open(file_result, 'w') as f:
    f.write('Epoch \t Time(sec) \t Loss_train \t AUC_dev \t AUC_test \t Precision_test \t Recall_test\n')

for epoch in range(iteration):
    if (epoch+1) % decay_interval == 0:
        trainer.optimizer.param_groups[0]['lr'] *= lr_decay

    loss    = trainer.train(dataset_train)
    auc_dev = tester.test(dataset_dev)[0]
    auc_test, precision, recall = tester.test(dataset_test)
    
    lr_rate = trainer.optimizer.param_groups[0]['lr']

    end  = timeit.default_timer()
    time = end - start

    tester.result(epoch, time, loss, auc_dev, auc_test, precision, recall, file_result)
    print('%d \t %.4f \t %.4f \t %.4f \t %.4f \t %.4f \t %.4f' %(epoch, time, loss, auc_dev, auc_test, precision, recall))

Training...
Epoch 	 Time(sec) 	 Loss_train 	 AUC_dev 	 AUC_test 	 Precision 	 Recall


  'precision', 'predicted', average, warn_for)


0 	 42.0321 	 1047.9239 	 0.4000 	 0.4500 	 0.0000 	 0.0000
1 	 78.8621 	 192.4038 	 0.4000 	 0.4500 	 0.0000 	 0.0000
