In [156]:
import time
import argparse
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
import math
import torch

from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module


class GraphConv(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConv, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
       
        support = torch.mm(input, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output


In [157]:
import torch.nn as nn
import torch.nn.functional as F


class GCN(nn.Module):
    def __init__(self, nfeat, hid, nclass, dropout):
        super(GCN, self).__init__()

        self.gconv1 = GraphConv(nfeat, hid)
        self.gconv2 = GraphConv(hid, hid)
        self.dropout = dropout
        self.fc = nn.Linear(hid, nclass)

    def forward(self, x, adj):
        x = F.relu(self.gconv1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gconv2(x, adj)
  
        x=self.fc(x)
        return F.log_softmax(x, dim=1)

In [158]:
node_feats = torch.arange(8, dtype=torch.float32).view(1, 4, 2)
adj_matrix = torch.Tensor([[[1, 1, 0, 0],
                            [1, 1, 1, 1],
                            [0, 1, 1, 1],
                            [0, 1, 1, 1]]])

print("Node features:\n", node_feats)
print("\nAdjacency matrix:\n", adj_matrix)

Node features:
 tensor([[[0., 1.],
         [2., 3.],
         [4., 5.],
         [6., 7.]]])

Adjacency matrix:
 tensor([[[1., 1., 0., 0.],
         [1., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]]])


In [159]:
import numpy as np
np.random.seed(args.seed)

    
model = GCN(nfeat=2 ,
            hid=16,
            nclass=2,
            dropout=0.5)

model(node_feats.squeeze(0),adj_matrix.squeeze(0))


tensor([[-0.9284, -0.5029],
        [-1.2041, -0.3566],
        [-1.0035, -0.4566],
        [-1.0035, -0.4566]], grad_fn=<LogSoftmaxBackward0>)

# Graph Attention

In [212]:
from dgl import DGLGraph
from dgl.data import citation_graph as citegrh
import networkx as nx

def load_cora_data():
    data = citegrh.load_cora()
    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    mask = torch.BoolTensor(data.train_mask)
    g = data[0]
    return g, features, labels, mask


In [388]:
class GATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim):
        super(GATLayer, self).__init__()
        self.g = g
        # equation (1)
        self.fc = nn.Linear(in_dim, out_dim, bias=False)
        # equation (2)
        self.attn_fc = nn.Linear(2 * out_dim, 1, bias=False)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain('relu')
        nn.init.xavier_normal_(self.fc.weight, gain=gain)
        nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)

    def edge_attention(self, edges):
        # edge UDF for equation (2)
        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        #print('z2', z2.shape)
        a = self.attn_fc(z2)
        #print('a', a.shape)
        return {'e': F.leaky_relu(a)}

    def message_func(self, edges):
        # message UDF for equation (3) & (4)
        return {'z': edges.src['z'], 'e': edges.data['e']}
                    #source nodes features ||  edge features

    def reduce_func(self, nodes):
        # reduce UDF for equation (3) & (4)
        # equation (3)
        alpha = F.softmax(nodes.mailbox['e'], dim=1)
        #print(nodes.mailbox['e'].shape)
       
        # equation (4)
        h = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        #print(h.shape)
        return {'h': h}

    def forward(self, h):
        # equation (1)
        z = self.fc(h)
        #print('z',z.shape)
        self.g.ndata['z'] = z
        # equation (2)
        self.g.apply_edges(self.edge_attention)
        # equation (3) & (4)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop('h')

In [389]:
g, features, labels, mask = load_cora_data()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [390]:
g

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [391]:
net1= GATLayer(g,
               features.size()[1],
               out_dim=7)


net1(features).shape

torch.Size([2708, 7])

In [422]:
class MultiHeadGATLayer(nn.Module):
    def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer(g, in_dim, out_dim))
        self.merge = merge

    def forward(self, h):
        head_outs = [attn_head(h) for attn_head in self.heads]
        #print(head_outs[0].shape)
        if self.merge == 'cat':
            # concat on the output feature dimension (dim=1)
            return torch.cat(head_outs, dim=1)
        else:
            # merge using average
            return torch.mean(torch.stack(head_outs))

In [433]:
class GAT(nn.Module):
    def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
        super(GAT, self).__init__()
        self.layer1 = MultiHeadGATLayer(g, in_dim, hidden_dim, num_heads)
        # Be aware that the input dimension is hidden_dim*num_heads since
        # multiple head outputs are concatenated together. Also, only
        # one attention head in the output layer.
        self.layer2 = MultiHeadGATLayer(g, hidden_dim * num_heads, out_dim, 1)

    def forward(self, h):
        h = self.layer1(h)
    
        h = F.elu(h)
        h = self.layer2(h)
        return h

In [434]:
g, features, labels, mask = load_cora_data()

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.




In [435]:
net2= GAT(g,
          features.size()[1],
          hidden_dim=8,
          out_dim=7,
          num_heads=8)

In [436]:
net3(features).shape

torch.Size([2708, 7])

In [438]:
import time
import numpy as np

g, features, labels, mask = load_cora_data()

# create the model, 2 heads, each head has hidden size 8
net = GAT(g,
          in_dim=features.size()[1],
          hidden_dim=8,
          out_dim=7,
          num_heads=2)

# create optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)

# main loop
dur = []
for epoch in range(50):
    if epoch >= 3:
        t0 = time.time()

    logits = net(features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[mask], labels[mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >= 3:
        dur.append(time.time() - t0)

    print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format(
        epoch, loss.item(), np.mean(dur)))

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch 00000 | Loss 1.9464 | Time(s) nan
Epoch 00001 | Loss 1.9444 | Time(s) nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00002 | Loss 1.9425 | Time(s) nan
Epoch 00003 | Loss 1.9405 | Time(s) 0.0844
Epoch 00004 | Loss 1.9386 | Time(s) 0.0814
Epoch 00005 | Loss 1.9366 | Time(s) 0.0809
Epoch 00006 | Loss 1.9346 | Time(s) 0.0800
Epoch 00007 | Loss 1.9327 | Time(s) 0.0789
Epoch 00008 | Loss 1.9307 | Time(s) 0.0789
Epoch 00009 | Loss 1.9287 | Time(s) 0.0797
Epoch 00010 | Loss 1.9267 | Time(s) 0.0793
Epoch 00011 | Loss 1.9247 | Time(s) 0.0795
Epoch 00012 | Loss 1.9228 | Time(s) 0.0795
Epoch 00013 | Loss 1.9208 | Time(s) 0.0790
Epoch 00014 | Loss 1.9187 | Time(s) 0.0788
Epoch 00015 | Loss 1.9167 | Time(s) 0.0787
Epoch 00016 | Loss 1.9147 | Time(s) 0.0785
Epoch 00017 | Loss 1.9127 | Time(s) 0.0785
Epoch 00018 | Loss 1.9106 | Time(s) 0.0785
Epoch 00019 | Loss 1.9086 | Time(s) 0.0782
Epoch 00020 | Loss 1.9065 | Time(s) 0.0782
Epoch 00021 | Loss 1.9044 | Time(s) 0.0783
Epoch 00022 | Loss 1.9023 | Time(s) 0.0782
Epoch 00023 | Loss 1.9002 | Time(s) 0.0784
Epoch 00024 | Loss 1.8981 | Time(s) 0.0784
Epoch 00025 | 