# Graph Convolutional Network by Kipf and Welling

## Imports

In [45]:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

import pickle
import numpy as np

import itertools

import Notebooks.performance as pf

## GNN Definition

In [46]:
class LinearModule(nn.Module):
    """The linear transformation part of the GCN layer"""
    def __init__(self, in_feats, out_feats, activation):
        super(LinearModule, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)
        self.activation = activation # This is the activation function

    def forward(self, node):
        h = self.linear(node.data['h'])
        h = self.activation(h)
        return {'h' : h}

In [47]:
class GCN(nn.Module):
    """A GCN layer"""
    def __init__(self, in_feats, out_feats, activation):
        super(GCN, self).__init__()
        self.apply_mod = LinearModule(in_feats, out_feats, activation)

    def forward(self, g, feature):
        g.ndata['h'] = feature
        g.update_all(message_func=fn.copy_src(src='h', out='m'), reduce_func=fn.sum(msg='m', out='h'))
        g.apply_nodes(func=self.apply_mod)
        return g.ndata.pop('h')

In [81]:
class Net(nn.Module):
    def __init__(self, infeats, hidden_size, outfeats):
        super(Net, self).__init__()
        self.gcn1 = GCN(infeats, hidden_size, F.relu)
        self.gcn2 = GCN(hidden_size, hidden_size, F.relu)
        self.gcn3 = GCN(hidden_size, outfeats, F.relu)
        self.dropout = nn.Dropout(0.2)

    def forward(self, g, features):
        x = self.gcn1(g, features)
        #x = self.dropout(x)
        #x = self.gcn2(g, x)
        x = self.gcn3(g, x)
        x = F.log_softmax(x,1)
        return x

## Data Loading

In [49]:
from dgl.data import citation_graph as citegrh
import networkx as nx

data = citegrh.load_cora()
features = th.FloatTensor(data.features)
labels = th.LongTensor(data.labels)
mask = th.BoolTensor(data.train_mask)
g = data.graph

# add self loop
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
g.add_edges(g.nodes(), g.nodes())

In [50]:
citeseer_data = dgl.data.CitationGraphDataset("citeseer")
citeseer_features = th.FloatTensor(citeseer_data.features)
citeseer_labels = th.LongTensor(citeseer_data.labels)
citeseer_mask = th.BoolTensor(citeseer_data.train_mask)
citeseer_g = citeseer_data.graph

# add self loop
citeseer_g.remove_edges_from(nx.selfloop_edges(citeseer_g))
citeseer_g = DGLGraph(citeseer_g)
citeseer_g.add_edges(citeseer_g.nodes(), citeseer_g.nodes())

  r_inv = np.power(rowsum, -1).flatten()
Finished data loading and preprocessing.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000


## Select Training Set

In [51]:
percentage_train = 0.5

with open("data/cora_permutation1.pickle","rb") as f:
    perm1 = pickle.load(f)
mask = np.zeros(g.number_of_nodes())
mask[perm1[range(int(percentage_train*g.number_of_nodes()))]] = 1
mask = th.BoolTensor(mask)

In [83]:
features = g.in_degrees().float().unsqueeze(1)
citeseer_features = citeseer_g.in_degrees().float().unsqueeze(1)

In [84]:
features=th.cat([features,th.rand(size=(g.number_of_nodes(),1000))],1)

In [90]:
features=th.eye(g.number_of_nodes())
citeseer_features=th.eye(citeseer_g.number_of_nodes())

## Training

In [61]:
loss_function = pf.perm_inv_loss(labels)

In [87]:
import time

net = Net(features.shape[1], 21, len(np.unique(labels)))
#print(net)

optimizer = th.optim.Adam(net.parameters(), lr=1e-2, weight_decay=1e-2)
net.train() # Set to training mode (use dropout)

dur = []
for epoch in range(200):
    if epoch >=3:
        t0 = time.time()

    # Compute loss for test nodes (only for validation, not used by optimizer)
    net.eval()
    prediction = net(g, features)
    train_rand=pf.rand_score(labels[mask].numpy(),np.argmax(prediction[mask].detach().numpy(), axis=1))
    validation_rand=pf.rand_score(labels[1-mask].numpy(),np.argmax(prediction[1-mask].detach().numpy(), axis=1))
    net.train()

    # Compute loss for train nodes
    logits = net(g, features)

    loss = loss_function.approximate_loss(logits,mask,nclasses=6)

    #loss = F.nll_loss(logits[mask], labels[mask])
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >=3:
        dur.append(time.time() - t0)

    print(f"Epoch {epoch:05d} | Loss {loss.item():.4f} | Train.Rand {train_rand:.4f} | Valid.Rand {validation_rand:.4f} | Time(s) {np.mean(dur):.4f}")

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
Epoch 00000 | Loss 2.0910 | Train.Rand -0.0004 | Valid.Rand -0.0008 | Time(s) nan
Epoch 00001 | Loss 1.8714 | Train.Rand 0.1032 | Valid.Rand 0.1133 | Time(s) nan
Epoch 00002 | Loss 1.7139 | Train.Rand 0.3322 | Valid.Rand 0.2596 | Time(s) nan
Epoch 00003 | Loss 2.2637 | Train.Rand 0.4655 | Valid.Rand 0.3533 | Time(s) 0.8882
Epoch 00004 | Loss 2.0510 | Train.Rand 0.4536 | Valid.Rand 0.3572 | Time(s) 0.8562
Epoch 00005 | Loss 1.8507 | Train.Rand 0.4840 | Valid.Rand 0.3582 | Time(s) 0.8365
Epoch 00006 | Loss 1.9922 | Train.Rand 0.4956 | Valid.Rand 0.3703 | Time(s) 0.8568
Epoch 00007 | Loss 1.8644 | Train.Rand 0.4925 | Valid.Rand 0.3654 | Time(s) 0.8659
Epoch 00008 | Loss 1.7276 | Train.Rand 0.4602 | Valid.Rand 0.3299 | Time(s) 0.8497
Epoch 00009 | Loss 1.9767 | Train.Rand 0.4346 | Valid.Rand 0.3058 | Time(s) 0.8509
Epoch 00010 | Loss 1.4297 | Train.Rand 0.4021 | Valid.Rand 0.3054 | Time(s) 0.8437
Epoch 00011 | Loss 1.6690 | Train.Ra

In [56]:
# Visualise predictions
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(g, features).detach()
a = np.transpose(np.vstack([final_prediction[mask].numpy().argmax(axis=1),labels[mask].numpy()]))
a[a[:,0].argsort()][np.random.choice(range(a.shape[0]),size=10)]
# as can be seen, the net predicts other labels, but gets the clusters right :)

array([[5, 4],
       [2, 2],
       [4, 1],
       [0, 0],
       [0, 0],
       [0, 5],
       [4, 1],
       [2, 2],
       [3, 2],
       [2, 2]], dtype=int64)

## Evaluation

In [94]:
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(g, features).detach()
pf.performance_as_df(labels,final_prediction,mask)

Unnamed: 0,All,Train,Test
Mutual Information,0.623731,0.738277,0.536313
Rand-Index,0.620559,0.738687,0.515168
Variation of Information,1.33686,0.915272,1.653684


In [92]:
net.eval() # Set net to evaluation mode (deactivates dropout)
final_prediction = net(citeseer_g, citeseer_features).detach()
pf.performance_as_df(citeseer_labels,final_prediction,mask)

DGLError: Expect number of features to match number of nodes (len(u)). Got 2708 and 3327 instead.