# Graph Convolutional Network by Kipf and Welling

## Imports

In [18]:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

import pickle
import copy
import numpy as np

import itertools

import performance as pf

## GNN Definition

In [2]:
class LinearModule(nn.Module):
    """The linear transformation part of the GCN layer"""
    def __init__(self, in_feats, out_feats, activation):
        super(LinearModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation # This is the activation function

    def forward(self, node):
        h = self.linear(node.data['h'])
        h = self.activation(h)
        return {'h' : h}

In [3]:
class GCN(nn.Module):
    """A GCN layer"""
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = LinearModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        g.ndata['h'] = feature
        g.update_all(message_func=fn.copy_src(src='h', out='m'), reduce_func=fn.sum(msg='m', out='h'))
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')

In [13]:
class Net(nn.Module):
    def __init__(self, infeats, hidden_size, outfeats):
        super(Net, self).__init__()
        self.gcn1 = GCN(infeats, hidden_size, F.relu)
        self.gcn2 = GCN(hidden_size, hidden_size, F.relu)
        self.gcn3 = GCN(hidden_size, outfeats, F.relu)
        self.dropout = nn.Dropout(0.2)

    def forward(self, g, features):
        x = self.gcn1(g, features)
        x = self.dropout(x)
        #x = self.gcn2(g, x)
        x = self.gcn3(g, x)
        x = F.log_softmax(x,1)
        return x

## Data Loading

In [5]:
from dgl.data import citation_graph as citegrh
import networkx as nx

data = citegrh.load_cora()
features = th.FloatTensor(data.features)
labels = th.LongTensor(data.labels)
mask = th.BoolTensor(data.train_mask)
g = data.graph

# add self loop
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
g.add_edges(g.nodes(), g.nodes())

In [50]:
citeseer_data = dgl.data.CitationGraphDataset("citeseer")
citeseer_features = th.FloatTensor(citeseer_data.features)
citeseer_labels = th.LongTensor(citeseer_data.labels)
citeseer_mask = th.BoolTensor(citeseer_data.train_mask)
citeseer_g = citeseer_data.graph

# add self loop
citeseer_g.remove_edges_from(nx.selfloop_edges(citeseer_g))
citeseer_g = DGLGraph(citeseer_g)
citeseer_g.add_edges(citeseer_g.nodes(), citeseer_g.nodes())

  r_inv = np.power(rowsum, -1).flatten()
Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000


## Select Training, Validation and Test Set

In [14]:
percentage_train = 0.5
percentage_val = 0.1

with open("../data/cora_permutation1.pickle","rb") as f:
    perm1 = pickle.load(f)
mask_train = np.zeros(g.number_of_nodes())
mask_val = np.zeros(g.number_of_nodes())

i_train = int(percentage_train*g.number_of_nodes())
i_val = i_train + int(percentage_val*g.number_of_nodes())
mask_train[perm1[range(0,i_train)]] = 1
mask_val[perm1[range(i_train,i_val)]] = 1
mask_train = th.BoolTensor(mask_train)
mask_val = th.BoolTensor(mask_val)

In [83]:
features = g.in_degrees().float().unsqueeze(1)
citeseer_features = citeseer_g.in_degrees().float().unsqueeze(1)

In [84]:
features=th.cat([features,th.rand(size=(g.number_of_nodes(),1000))],1)

In [90]:
features=th.eye(g.number_of_nodes())
citeseer_features=th.eye(citeseer_g.number_of_nodes())

## Training

In [8]:
loss_function = pf.perm_inv_loss(labels)

In [20]:
import time

net = Net(features.shape[1], 100, len(np.unique(labels)))
#print(net)

optimizer = th.optim.Adam(net.parameters(), lr=1e-2, weight_decay=1e-2)
net.train() # Set to training mode (use dropout)

dur = []
loss_ev = []
current_best = 0 #arbitrarily high
current_best_epoch = 0
current_best_model = None
no_improvement_for = 0

for epoch in range(10000):
    if epoch >=3:
        t0 = time.time()

    # Compute loss for test nodes (only for validation, not used by optimizer)
    net.eval()
    prediction = net(g, features)
    train_rand=pf.rand_score(labels[mask_train].numpy(),np.argmax(prediction[mask_train].detach().numpy(), axis=1))
    validation_rand=pf.rand_score(labels[mask_val].numpy(),np.argmax(prediction[mask_val].detach().numpy(), axis=1))
    if train_rand>current_best:
        current_best = train_rand
        current_best_epoch = epoch
        current_best_model = copy.deepcopy(net)
        no_improvement_for = 0
    else: no_improvement_for += 1
    
    if no_improvement_for>100:
        break
    
    net.train()

    # Compute loss for train nodes
    logits = net(g, features)

    loss = loss_function.approximate_loss(logits,mask_train,nclasses=5)
    loss_ev.append(loss.detach().item())

    #loss = F.nll_loss(logits[mask_train], labels[mask_train])
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >=3:
        dur.append(time.time() - t0)
        print(f"Epoch {epoch:05d} | Loss {loss.item():.4f} | Train.Rand {train_rand:.4f} | Valid.Rand {validation_rand:.4f} | Time(s) {np.mean(dur):.4f}")
    else:
        print(f"Epoch {epoch:05d} | Loss {loss.item():.4f} | Train.Rand {train_rand:.4f} | Valid.Rand {validation_rand:.4f} | Time(s) unknown")

Epoch 00000 | Loss 2.2667 | Train.Rand 0.0270 | Valid.Rand 0.0232 | Time(s) unknown
Epoch 00001 | Loss 2.5272 | Train.Rand 0.0207 | Valid.Rand 0.0093 | Time(s) unknown
Epoch 00002 | Loss 2.6027 | Train.Rand 0.1183 | Valid.Rand 0.0641 | Time(s) unknown
Epoch 00003 | Loss 2.3326 | Train.Rand 0.1997 | Valid.Rand 0.1766 | Time(s) 0.2499
Epoch 00004 | Loss 2.1822 | Train.Rand 0.2313 | Valid.Rand 0.1344 | Time(s) 0.2582
Epoch 00005 | Loss 2.1375 | Train.Rand 0.3024 | Valid.Rand 0.2139 | Time(s) 0.2576
Epoch 00006 | Loss 2.2869 | Train.Rand 0.1705 | Valid.Rand 0.1226 | Time(s) 0.2557
Epoch 00007 | Loss 2.4844 | Train.Rand 0.1552 | Valid.Rand 0.1060 | Time(s) 0.2545
Epoch 00008 | Loss 2.0696 | Train.Rand 0.2594 | Valid.Rand 0.1856 | Time(s) 0.2554
Epoch 00009 | Loss 2.3312 | Train.Rand 0.2998 | Valid.Rand 0.1863 | Time(s) 0.2549
Epoch 00010 | Loss 2.0831 | Train.Rand 0.2782 | Valid.Rand 0.2187 | Time(s) 0.2543
Epoch 00011 | Loss 2.2683 | Train.Rand 0.1160 | Valid.Rand 0.0817 | Time(s) 0.2538
E

Epoch 00098 | Loss 2.2133 | Train.Rand 0.3896 | Valid.Rand 0.3740 | Time(s) 0.2639
Epoch 00099 | Loss 2.2674 | Train.Rand 0.4062 | Valid.Rand 0.3817 | Time(s) 0.2637
Epoch 00100 | Loss 2.2156 | Train.Rand 0.3762 | Valid.Rand 0.3570 | Time(s) 0.2636
Epoch 00101 | Loss 2.1701 | Train.Rand 0.3231 | Valid.Rand 0.2762 | Time(s) 0.2633
Epoch 00102 | Loss 2.1906 | Train.Rand 0.2194 | Valid.Rand 0.1274 | Time(s) 0.2632
Epoch 00103 | Loss 2.3927 | Train.Rand 0.2204 | Valid.Rand 0.1490 | Time(s) 0.2629
Epoch 00104 | Loss 2.3812 | Train.Rand 0.3172 | Valid.Rand 0.2549 | Time(s) 0.2628
Epoch 00105 | Loss 2.4337 | Train.Rand 0.3986 | Valid.Rand 0.3120 | Time(s) 0.2626
Epoch 00106 | Loss 2.2295 | Train.Rand 0.2886 | Valid.Rand 0.2523 | Time(s) 0.2624
Epoch 00107 | Loss 2.3366 | Train.Rand 0.1061 | Valid.Rand 0.0644 | Time(s) 0.2623
Epoch 00108 | Loss 2.3593 | Train.Rand 0.1174 | Valid.Rand 0.0615 | Time(s) 0.2622
Epoch 00109 | Loss 2.2021 | Train.Rand 0.1660 | Valid.Rand 0.0862 | Time(s) 0.2620
Epoc

Epoch 00196 | Loss 2.3974 | Train.Rand 0.0230 | Valid.Rand 0.0110 | Time(s) 0.2681
Epoch 00197 | Loss 2.3534 | Train.Rand 0.0404 | Valid.Rand 0.0249 | Time(s) 0.2681
Epoch 00198 | Loss 2.1655 | Train.Rand 0.0833 | Valid.Rand 0.0604 | Time(s) 0.2682
Epoch 00199 | Loss 2.4626 | Train.Rand 0.0988 | Valid.Rand 0.0904 | Time(s) 0.2684
Epoch 00200 | Loss 2.0706 | Train.Rand 0.1174 | Valid.Rand 0.1207 | Time(s) 0.2686
Epoch 00201 | Loss 2.2124 | Train.Rand 0.1203 | Valid.Rand 0.1207 | Time(s) 0.2689
Epoch 00202 | Loss 2.3218 | Train.Rand 0.1207 | Valid.Rand 0.1213 | Time(s) 0.2690
Epoch 00203 | Loss 2.2857 | Train.Rand 0.1263 | Valid.Rand 0.1244 | Time(s) 0.2690
Epoch 00204 | Loss 2.3093 | Train.Rand 0.1311 | Valid.Rand 0.1255 | Time(s) 0.2691
Epoch 00205 | Loss 2.4075 | Train.Rand 0.1413 | Valid.Rand 0.1248 | Time(s) 0.2692
Epoch 00206 | Loss 2.3626 | Train.Rand 0.1319 | Valid.Rand 0.1227 | Time(s) 0.2693
Epoch 00207 | Loss 2.3573 | Train.Rand 0.1322 | Valid.Rand 0.1039 | Time(s) 0.2693
Epoc

In [56]:
# Visualise predictions
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(g, features).detach()
a = np.transpose(np.vstack([final_prediction[mask].numpy().argmax(axis=1),labels[mask].numpy()]))
a[a[:,0].argsort()][np.random.choice(range(a.shape[0]),size=10)]
# as can be seen, the net predicts other labels, but gets the clusters right :)

array([[5, 4],
       [2, 2],
       [4, 1],
       [0, 0],
       [0, 0],
       [0, 5],
       [4, 1],
       [2, 2],
       [3, 2],
       [2, 2]], dtype=int64)

## Evaluation

In [21]:
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(g, features).detach()
pf.performance_as_df(labels,final_prediction,mask)

Unnamed: 0,All,Train,Test
Rand-Index,0.159913,0.188761,0.157953
Mutual Information,0.3526,0.371837,0.349843
Variation of Information,1.676176,1.547768,1.679533


In [22]:
current_best_model.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = current_best_model(g, features).detach()
pf.performance_as_df(labels,final_prediction,mask)

Unnamed: 0,All,Train,Test
Rand-Index,0.412778,0.392583,0.414029
Mutual Information,0.410194,0.401718,0.410272
Variation of Information,1.833678,1.774588,1.830229


In [92]:
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(citeseer_g, citeseer_features).detach()
pf.performance_as_df(citeseer_labels,final_prediction,mask)

DGLError: Expect number of features to match number of nodes (len(u)). Got 2708 and 3327 instead.