In [1]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl.function as fn
from dgl.nn import GATConv
from captum.attr import IntegratedGradients
from dgl.nn import GraphConv
from functools import partial
import torch.nn.functional as F
import dgl


# Define a class for GCN
class GCN(nn.Module):
       def __init__(self,
                           in_feats,
                           num_classes,
                           num_layers):
           super(GCN, self).__init__()
           self.layers = nn.ModuleList()
           self.layers.append(GraphConv(in_feats, num_classes))
           for _ in range(num_layers - 1):
                self.layers.append(GraphConv(num_classes, num_classes))

       def forward(self, g, h):
             for layer in self.layers:
                  h = layer(g, h)
             return h
def get_identity_features(features):
    num_node = features.shape[0]
    features = torch.eye(num_node)
    return features

def get_identity_graph(g):
    num_nodes = g.num_nodes()
    srcs = [i for i in range(num_nodes)]
    tgts = [i for i in range(num_nodes)]
    new_g = dgl.graph((srcs, tgts))
    return new_g
# Define a function to train a GCN with the specified number of layers 
# and return the predictions
def train_gcn(g, num_layers, num_classes, identity_features=False, identity_adj=False):
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    if identity_features:
        features = get_identity_features(features)
    if identity_adj:
        g = get_identity_graph(g)
    
    model = GCN(in_feats=features.shape[1],
                             num_classes=num_classes,
                             num_layers=num_layers)
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
      
    num_epochs = 200
    model.train()
    for _ in range(num_epochs):
        logits = model(g, features)
        loss = loss_func(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
      
    model.eval()
    predictions = model(g, features)
    _, predicted_classes = torch.max(predictions, dim=1)
    confidence = F.softmax(predictions, dim=1)
    return predicted_classes, confidence



class GAT(nn.Module):
    def __init__(self,
                      num_layers,
                      in_dim,
                      num_hidden,
                      num_classes,
                      heads):
        super(GAT, self).__init__()
        self.num_layers = num_layers
        self.gat_layers = nn.ModuleList()
        # input projection (no residual)
        self.gat_layers.append(GATConv(in_dim, num_hidden, heads[0]))
        # hidden layers
        for l in range(1, num_layers - 1):
            # due to multi-head, in_dim = num_hidden * number of heads in the previous layer
            self.gat_layers.append(GATConv(num_hidden * heads[l-1], num_hidden, heads[l]))
        # output projection
        self.gat_layers.append(GATConv(num_hidden * heads[-2], num_classes, heads[-1]))

    def forward(self, g, h):
        attns = []
        for l in range(self.num_layers - 1):
            h, attn = self.gat_layers[l](g, h, get_attention=True)
            h = h.flatten(1)
            attns.append(attn)
        # output projection
        logits, attn = self.gat_layers[-1](g, h, get_attention=True)
        logits = logits.mean(1)
        attns.append(attn)
        return logits, attns
    

def train_gat(g, num_layers, heads, num_classes, identity_features=False, identity_adj=False):
    features = g.ndata['feat']
    labels = g.ndata['label']
    train_mask = g.ndata['train_mask']
    if identity_features:
        features = get_identity_features(features)
    if identity_adj:
        g = get_identity_graph(g)
    model = GAT(num_layers=num_layers,
                        in_dim=features.shape[1],
                        num_hidden=8,
                        num_classes=num_classes,
                        heads=heads)
    loss_func = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

    num_epochs = 35
    model.train()
    for epochs in range(num_epochs):
        logits, _ = model(g, features)
        loss = loss_func(logits[train_mask], labels[train_mask])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    predictions, attns = model(g, features)
    _, predicted_classes = torch.max(predictions, dim=1)
    confidence = F.softmax(predictions, dim=1)
    return predicted_classes, confidence
    

def extract_subgraph(g, node):
    seed_nodes = [node]
    sg = dgl.in_subgraph(g, seed_nodes)
    src, dst = sg.edges()
    seed_nodes = torch.cat([src, dst]).unique()
    sg = dgl.in_subgraph(g, seed_nodes, relabel_nodes=True)
    return sg

# Define a class for GCN
class GCN2(nn.Module):
       def __init__(self,
                           in_feats,
                           num_classes):
             super(GCN2, self).__init__()
             self.conv = GraphConv(in_feats, num_classes)

       def forward(self, h, g):
             # Interchange the order of g and h due to the behavior of partial
             return self.conv(g, h)

Using backend: pytorch


In [2]:
from dgl.data import CoraGraphDataset, CiteseerGraphDataset
import torch
import gnnlens.GNNLensWriter as GNNLensWriter


def write_information_for_graph(graph_name, dataset, writer):
    print("Write information for", graph_name)
    graph = dataset[0]
    nlabels = graph.ndata['label']
    num_classes = dataset.num_classes
    confidence = torch.rand(graph.num_edges(),)
    strength = torch.rand(graph.num_edges(),)
    nlabels = graph.ndata['label']
    num_classes = dataset.num_classes
    features = graph.ndata['feat']

    writer.add_graph(graph_name, graph, nlabels, num_classes, features, calculate_metrics=True)
    if graph_name == "Cora":
        print("Training GCN with two layers...")
        predicted_classes_GCN, output_vector_GCN = train_gcn(graph, num_layers=2, num_classes=num_classes)
        print("Training MLP with two layers...")
        predicted_classes_MLP, output_vector_MLP = train_gcn(graph, num_layers=2, num_classes=num_classes, identity_adj=True)
        print("Training GCNWUF with two layers...")
        predicted_classes_GCNWUF, output_vector_GCNWUF = train_gcn(graph, num_layers=2, num_classes=num_classes, identity_features=True)
        print("Training GCN with one layers...")
        predicted_classes_GCN_1, output_vector_GCN_1 = train_gcn(graph, num_layers=1, num_classes=num_classes)
        print("Training MLP with one layers...")
        predicted_classes_MLP_1, output_vector_MLP_1 = train_gcn(graph, num_layers=1, num_classes=num_classes, identity_adj=True)
        print("Training GCNWUF with one layers...")
        predicted_classes_GCNWUF_1, output_vector_GCNWUF_1 = train_gcn(graph, num_layers=1, num_classes=num_classes, identity_features=True)

        # Dump the predictions to local files
        writer.add_model(graph_name, "GCN_L2", predicted_classes_GCN, output_vector_GCN)
        writer.add_model(graph_name, "MLP_L2", predicted_classes_MLP, output_vector_MLP)
        writer.add_model(graph_name, "GCNWUF_L2", predicted_classes_GCNWUF, output_vector_GCNWUF)
        writer.add_model(graph_name, "GCN_L1", predicted_classes_GCN_1, output_vector_GCN_1)
        writer.add_model(graph_name, "MLP_L1", predicted_classes_MLP_1, output_vector_MLP_1)
        writer.add_model(graph_name, "GCNWUF_L1", predicted_classes_GCNWUF_1, output_vector_GCNWUF_1)

    if graph_name == "Citeseer":
        print("Training GAT with two layers...")
        predictions_gat_two_layers, confidence_gat_two_layers = train_gat(
              graph, num_layers=2, heads=[2,1], num_classes=num_classes)
        writer.add_model(graph_name, "GAT_L2", predictions_gat_two_layers, confidence_gat_two_layers)
        
        print("Training MLP with two layers...")
        predictions_mlp_two_layers, confidence_mlp_two_layers = train_gat(
              graph, num_layers=2, heads=[2,1], num_classes=num_classes, identity_adj=True)
        writer.add_model(graph_name, "MLP_L2", predictions_mlp_two_layers, confidence_mlp_two_layers)

        
        print("Training GATWUF with two layers...")
        predictions_gatwuf_two_layers, confidence_gatwuf_two_layers = train_gat(
              graph, num_layers=2, heads=[2,1], num_classes=num_classes, identity_features=True)
        writer.add_model(graph_name, "GATWUF_L2", predictions_gatwuf_two_layers, confidence_gatwuf_two_layers)

        
        print("Training GAT with three layers...")
        predictions_gat_three_layers, confidence_gat_three_layers = train_gat(
              graph, num_layers=3, heads=[4,2,1], num_classes=num_classes)
        writer.add_model(graph_name, "GAT_L3", predictions_gat_three_layers, confidence_gat_three_layers)
        
        '''
        print("Training MLP with three layers...")
        predictions_mlp_three_layers, confidence_mlp_three_layers = train_gat(
              graph, num_layers=3, heads=[4,2,1], num_classes=num_classes, identity_adj=True)
        writer.add_model(graph_name, "GAT_L3", predictions_mlp_three_layers, confidence_mlp_three_layers)
        
        print("Training GATWUF with three layers...")
        predictions_gatwuf_three_layers, confidence_gatwuf_three_layers = train_gat(
              graph, num_layers=3, heads=[4,2,1], num_classes=num_classes, identity_features=True)
        writer.add_model(graph_name, "GAT_L3", predictions_gatwuf_three_layers, confidence_gatwuf_three_layers)
        '''
    


cora_dataset = CoraGraphDataset()
citeseer_dataset = CiteseerGraphDataset()
writer = GNNLensWriter('stress_test')
write_information_for_graph("Cora", cora_dataset, writer)
write_information_for_graph("Citeseer", citeseer_dataset, writer)
writer.flush()
print("Finish..")

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
  NumNodes: 3327
  NumEdges: 9228
  NumFeats: 3703
  NumClasses: 6
  NumTrainingSamples: 120
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Write information for Cora
> Calculating SPD metrics for Cora
Finished: 0.003057718276977539
> Calculating KFS metrics for Cora
Finished: 0.06088137626647949
> Calculating layout for Cora
Finished: 0.0016596317291259766
Finish calculating metrics for Cora.
Training GCN with two layers...
Training MLP with two layers...
Training GCNWUF with two layers...
Training GCN with one layers...
Training MLP with one layers...
Training GCNWUF with one layers...
Write information for Citeseer
> Calculating SPD metrics for Citeseer
Finished: 0.006365537643432617
> Calculating KFS metrics for Citeseer
Finished: 0.011635541915893555
> Calculati