# GCN example code for Animesh
### PyTorch backend explicit set

In [1]:
import sys, os
os.environ["DGLBACKEND"] = "pytorch"

### GCN Class definition

In [2]:
"""GCN using DGL nn package
References:
- Semi-Supervised Classification with Graph Convolutional Networks
- Paper: https://arxiv.org/abs/1609.02907
- Code: https://github.com/tkipf/gcn
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv

class GCN(nn.Module):
    def __init__(self,
                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout):
        super(GCN, self).__init__()
        self.g = g
        self.layers = nn.ModuleList()
        # input layer
        self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
        # hidden layers
        for i in range(n_layers - 1):
            self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
        # output layer
        self.layers.append(GraphConv(n_hidden, n_classes))
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, features):
        h = features
        for i, layer in enumerate(self.layers):
            if i != 0:
                h = self.dropout(h)
            h = layer(self.g, h)
        return h


Using backend: pytorch


### Argument setup

In [3]:
import argparse, time
import numpy as np
from dgl.data import register_data_args, load_data

parser = argparse.ArgumentParser(description='GCN')
#register_data_args(parser)
parser.add_argument("--dropout", type=float, default=0.5,
        help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
        help="gpu")
parser.add_argument("--lr", type=float, default=1e-2,
        help="learning rate")
parser.add_argument("--n-epochs", type=int, default=100,
        help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=16,
        help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
        help="number of hidden gcn layers")
parser.add_argument("--weight-decay", type=float, default=5e-4,
        help="Weight for L2 loss")
parser.add_argument("--fold", type=float, default=10,
        help="Weight for L2 loss")
parser.add_argument("--number_edges", type=int, default=10,
                    help="Graph: minimum number of edges per vertex.")
parser.add_argument("--metric", type=str, default='euclidean',
                    help="Graph: similarity measure (between features).")
parser.add_argument("--self-loop", action='store_true',
        help="graph self-loop (default=False)")
parser.set_defaults(self_loop=False)

args, unknown = parser.parse_known_args()
print(args)

Namespace(dropout=0.5, fold=10, gpu=-1, lr=0.01, metric='euclidean', n_epochs=100, n_hidden=16, n_layers=1, number_edges=10, self_loop=False, weight_decay=0.0005)


### Supporting function

In [4]:
def evaluate(model, features, labels, mask):
    model.eval()
    with torch.no_grad():
        logits = model(features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)
    
def grid_graph(m, corners=False):
    z = graph.grid(m)
    dist, idx = graph.distance_sklearn_metrics(z, k=args.number_edges, metric=args.metric)
    A = graph.adjacency(dist, idx)

    # Connections are only vertical or horizontal on the grid.
    # Corner vertices are connected to 2 neightbors only.
    if corners:
        import scipy.sparse
        A = A.toarray()
        A[A < A.max()/1.5] = 0
        A = scipy.sparse.csr_matrix(A)
        print('{} edges'.format(A.nnz))
    print("{} > {} edges".format(A.nnz//2, args.number_edges*m**2//2))
    return A

### Load data

In [5]:
from sklearn.model_selection import StratifiedKFold
from scipy import stats
from dgl import DGLGraph

features = np.loadtxt(open("../100_final_data.csv", "rb"), delimiter=",", skiprows=1, usecols=range(1,82) )
features = features.transpose()
features = stats.zscore(features)
y = np.loadtxt(open("../all_data.csv", "rb"), delimiter=",", skiprows=1, usecols=range(1,82))
y = y[0,:]
np_features = features.copy()
bin_ixs = []
train_ixs = []

skf = StratifiedKFold(n_splits=args.fold)
for train_index, test_index in skf.split(features, y):
    bin_ixs.append(test_index)
    train_ixs.append(train_index)

features = torch.FloatTensor(features)
y = torch.LongTensor(y)    
train_index = torch.from_numpy(train_index)
test_index = torch.from_numpy(test_index)

# set train and test mask
train_mask = np.zeros(y.shape, dtype=bool)
test_mask =  np.zeros(y.shape, dtype=bool)
train_mask[train_index] = True
test_mask[test_index] = True
val_mask = test_mask # this is not correct but just make sure we can pass through
train_mask = torch.BoolTensor(train_mask)
test_mask = torch.BoolTensor(test_mask)
val_mask =  torch.BoolTensor(val_mask)
in_feats = features.shape[1]
n_classes = 2
labels = y

### Construct graph <--- this is not a good graph as it's only for image processing

In [7]:
sys.path.insert(0, '..')
from lib2 import graph
n_edges = 10
t_start = time.process_time()
A = grid_graph(9, corners=False)
A = graph.replace_random_edges(A, 0)
#graphs, perm = coarsening.coarsen(A, levels=args.coarsening_levels, self_connections=False)
#L = [graph.laplacian(A, normalized=True) for A in graphs]
g = DGLGraph()
g.from_scipy_sparse_matrix(A)
n_edges = A.nnz

print("""----Data statistics------'
  #Edges %d
  #Classes %d
  #Train samples %d
  #Val samples %d
  #Test samples %d""" %
      (n_edges, n_classes,
          train_mask.int().sum().item(),
          val_mask.int().sum().item(),
          test_mask.int().sum().item()))


if args.gpu < 0:
    cuda = False
else:
    cuda = True
    torch.cuda.set_device(args.gpu)
    features = features.cuda()
    labels = labels.cuda()
    train_mask = train_mask.cuda()
    val_mask = val_mask.cuda()
    test_mask = test_mask.cuda()

# add self loop
if args.self_loop:
    g.remove_edges_from(nx.selfloop_edges(g))
    g.add_edges_from(zip(g.nodes(), g.nodes()))
    
if cuda:
    g = g.to(args.gpu)

# Graph normalization
degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
if cuda:
    norm = norm.cuda()
g.ndata['norm'] = norm.unsqueeze(1)


470 > 405 edges
----Data statistics------'
  #Edges 940
  #Classes 2
  #Train samples 73
  #Val samples 8
  #Test samples 8


### Let's build GCN Model

In [8]:
model = GCN(g,
            in_feats,
            args.n_hidden,
            n_classes,
            args.n_layers,
            F.relu,
            args.dropout)

if cuda:
    model.cuda()
loss_fcn = torch.nn.CrossEntropyLoss()

# use optimizer
optimizer = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.weight_decay)

# initialize graph
dur = []
for epoch in range(args.n_epochs):
    model.train()
    if epoch >= 3:
        t0 = time.time()
    # forward
    logits = model(features)
    loss = loss_fcn(logits[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >= 3:
        dur.append(time.time() - t0)

    acc = evaluate(model, features, labels, val_mask)
    print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
          "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(),
                                         acc, n_edges / np.mean(dur) / 1000))

print()
acc = evaluate(model, features, labels, test_mask)
print("Test accuracy {:.2%}".format(acc))



  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Time(s) nan | Loss 0.7245 | Accuracy 0.3750 | ETputs(KTEPS) nan
Epoch 00001 | Time(s) nan | Loss 0.6749 | Accuracy 0.3750 | ETputs(KTEPS) nan
Epoch 00002 | Time(s) nan | Loss 0.6494 | Accuracy 0.3750 | ETputs(KTEPS) nan
Epoch 00003 | Time(s) 0.0046 | Loss 0.6268 | Accuracy 0.3750 | ETputs(KTEPS) 202.73
Epoch 00004 | Time(s) 0.0051 | Loss 0.6103 | Accuracy 0.3750 | ETputs(KTEPS) 183.46
Epoch 00005 | Time(s) 0.0053 | Loss 0.6030 | Accuracy 0.3750 | ETputs(KTEPS) 177.32
Epoch 00006 | Time(s) 0.0050 | Loss 0.5845 | Accuracy 0.3750 | ETputs(KTEPS) 186.45
Epoch 00007 | Time(s) 0.0049 | Loss 0.5681 | Accuracy 0.3750 | ETputs(KTEPS) 192.91
Epoch 00008 | Time(s) 0.0047 | Loss 0.5751 | Accuracy 0.3750 | ETputs(KTEPS) 199.90
Epoch 00009 | Time(s) 0.0046 | Loss 0.5585 | Accuracy 0.3750 | ETputs(KTEPS) 204.34
Epoch 00010 | Time(s) 0.0047 | Loss 0.5662 | Accuracy 0.3750 | ETputs(KTEPS) 201.45
Epoch 00011 | Time(s) 0.0046 | Loss 0.5587 | Accuracy 0.3750 | ETputs(KTEPS) 202.62
Epoch 0001