In [1]:
from dgl.data import CoraGraphDataset

dataset = CoraGraphDataset()
graph = dataset[0]
nlabels = graph.ndata['label']
num_classes = dataset.num_classes
features = graph.ndata['feat']
labels = graph.ndata['label']



Using backend: pytorch


  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.


In [2]:
graph

Graph(num_nodes=2708, num_edges=10556,
      ndata_schemes={'feat': Scheme(shape=(1433,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool)}
      edata_schemes={})

In [2]:
graph.num_nodes()

2708

In [3]:
import gnnlens.GNNLensWriter as GNNLensWriter

In [4]:
writer = GNNLensWriter("subgraph_examples_data")

graph = dataset[0]
nlabels = graph.ndata['label']
num_classes = dataset.num_classes
features = graph.ndata['feat']

writer.add_graph("Cora", graph, nlabels, num_classes, features, calculate_metrics=True)


> Calculating SPD metrics for Cora
Finished: 0.0025801658630371094
> Calculating KFS metrics for Cora
Finished: 0.011400461196899414
> Calculating layout for Cora
Finished: 0.0013010501861572266
Finish calculating metrics for Cora.


In [5]:
import torch
import dgl

def get_identity_features(features):
    num_node = features.shape[0]
    features = torch.eye(num_node)
    return features

def get_identity_graph(g):
    num_nodes = g.num_nodes()
    srcs = [i for i in range(num_nodes)]
    tgts = [i for i in range(num_nodes)]
    new_g = dgl.graph((srcs, tgts))
    return new_g

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GraphConv

# Define a class for GCN
class GCN(nn.Module):
       def __init__(self,
                           in_feats,
                           num_classes,
                           num_layers):
           super(GCN, self).__init__()
           self.layers = nn.ModuleList()
           self.layers.append(GraphConv(in_feats, num_classes))
           for _ in range(num_layers - 1):
                self.layers.append(GraphConv(num_classes, num_classes))

       def forward(self, g, h):
             for layer in self.layers:
                  h = layer(g, h)
             return h

# Define a function to train a GCN with the specified number of layers 
# and return the predictions
def train_gcn(g, num_layers, num_classes, identity_features=False, identity_adj=False):
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    if identity_features:
        features = get_identity_features(features)
    if identity_adj:
        g = get_identity_graph(g)
    
    model = GCN(in_feats=features.shape[1],
                             num_classes=num_classes,
                             num_layers=num_layers)
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
      
    num_epochs = 200
    model.train()
    for _ in range(num_epochs):
        logits = model(g, features)
        loss = loss_func(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
      
    model.eval()
    predictions = model(g, features)
    _, predicted_classes = torch.max(predictions, dim=1)
    confidence = F.softmax(predictions, dim=1)
    return predicted_classes, confidence, model


print("Training GCN with two layers...")
predicted_classes_GCN, output_vector_GCN, model = train_gcn(graph, num_layers=2, num_classes=num_classes)
#print("Training MLP with two layers...")
#predicted_classes_MLP, output_vector_MLP = train_gcn(graph, num_layers=2, num_classes=num_classes, identity_adj=True)

#print("Training GCNWUF with two layers...")
#predicted_classes_GCNWUF, output_vector_GCNWUF = train_gcn(graph, num_layers=2, num_classes=num_classes, identity_features=True)


Training GCN with two layers...


In [7]:
## model_key 0 is main model.
## model_key 1 is main model without structure.
## model_key 2 is main model without features.
writer.add_model("Cora", "GCN", predicted_classes_GCN, output_vector_GCN)
#writer.add_model("Cora", "MLP", predicted_classes_MLP, output_vector_MLP, 1)
#writer.add_model("Cora", "GCNWUF", predicted_classes_GCNWUF, output_vector_GCNWUF, 2)

In [8]:
import torch.nn as nn
import torch
from captum.attr import IntegratedGradients
from dgl.nn import GraphConv
from functools import partial

In [9]:
# Required by IntegratedGradients
h = graph.ndata['feat'].clone().requires_grad_(True)
#model = GCN(h.shape[1], num_classes)
def wrap_up_func(model, g):
    def forward_replace(h):
        return model(g,h)
    return forward_replace
ig = IntegratedGradients(wrap_up_func(model, graph))
# Attribute the predictions for node class 0 to the input features
feat_attr = ig.attribute(h, target=predicted_classes_GCN[0], internal_batch_size=graph.num_nodes(), n_steps=50)


In [10]:
import torch.nn.functional as F

node_weights = feat_attr.abs().sum(dim=1)
node_weights = (node_weights - node_weights.min()) / node_weights.max()

In [11]:
import dgl
import torch

def extract_subgraph(g, node):
    seed_nodes = [node]
    sg = dgl.in_subgraph(g, seed_nodes)
    src, dst = sg.edges()
    seed_nodes = torch.cat([src, dst]).unique()
    sg = dgl.in_subgraph(g, seed_nodes, relabel_nodes=True)
    return sg

graph.ndata['weight'] = node_weights
graph.edata['weight'] = torch.ones(graph.num_edges(),)
first_subgraph = extract_subgraph(graph, 0)
writer.add_subgraph(graph_name='Cora', subgraph_name='IntegratedGradients', node_id=0,
                                  subgraph_nids=first_subgraph.ndata[dgl.NID],
                                  subgraph_eids=first_subgraph.edata[dgl.EID],
                                  subgraph_nweights=first_subgraph.ndata['weight'],
                                  subgraph_eweights=first_subgraph.edata['weight'])
'''
second_subgraph = extract_subgraph(graph, 1)
writer.add_subgraph(graph_name='Cora', subgraph_name='IntegratedGradients', node_id=1,
                                  subgraph_nids=second_subgraph.ndata[dgl.NID],
                                  subgraph_eids=second_subgraph.edata[dgl.EID],
                                  subgraph_nweights=second_subgraph.ndata['weight'],
                                  subgraph_eweights=second_subgraph.edata['weight'])
                                  '''

"\nsecond_subgraph = extract_subgraph(graph, 1)\nwriter.add_subgraph(graph_name='Cora', subgraph_name='IntegratedGradients', node_id=1,\n                                  subgraph_nids=second_subgraph.ndata[dgl.NID],\n                                  subgraph_eids=second_subgraph.edata[dgl.EID],\n                                  subgraph_nweights=second_subgraph.ndata['weight'],\n                                  subgraph_eweights=second_subgraph.edata['weight'])\n                                  "

In [12]:
writer.flush()

In [None]:
# gnnlens --logdir /data2/zhihua/github/GNNLens/server/gnnlenswriter/examples_data