### Graph Classification Comparative Study

Experiment-1 Impact of Graph Attention Mechanism

### Exploring Dataset

In [56]:
import torch
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='data/TUDataset', name='PROTEINS')

print()
print(f'Dataset: {dataset}:')
print('====================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

data = dataset[0]  # Get the first graph object.

print()
print(data)
print('=============================================================')

# Gather some statistics about the first graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')


Dataset: PROTEINS(1113):
Number of graphs: 1113
Number of features: 3
Number of classes: 2

Data(edge_index=[2, 162], x=[42, 3], y=[1])
Number of nodes: 42
Number of edges: 162
Average node degree: 3.86
Has isolated nodes: False
Has self-loops: False
Is undirected: True


In [57]:
import torch
from torch.utils.tensorboard import SummaryWriter

In [58]:
torch.manual_seed(12345)
dataset = dataset.shuffle()

train_dataset = dataset[:750]
val_dataset = dataset[750:1000]
test_dataset = dataset[1000:]

print(f'Number of training graphs: {len(train_dataset)}')
print(f'Number of validation graphs: {len(val_dataset)}')
print(f'Number of test graphs: {len(test_dataset)}')

Number of training graphs: 750
Number of validation graphs: 250
Number of test graphs: 113


In [59]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

for step, data in enumerate(train_loader):
    print(f'Step {step + 1}:')
    print('=======')
    print(f'Number of graphs in the current batch: {data.num_graphs}')
    print(data)
    print()
    


Step 1:
Number of graphs in the current batch: 128
DataBatch(edge_index=[2, 18116], x=[4994, 3], y=[128], batch=[4994], ptr=[129])

Step 2:
Number of graphs in the current batch: 128
DataBatch(edge_index=[2, 18058], x=[4902, 3], y=[128], batch=[4902], ptr=[129])

Step 3:
Number of graphs in the current batch: 128
DataBatch(edge_index=[2, 18934], x=[5104, 3], y=[128], batch=[5104], ptr=[129])

Step 4:
Number of graphs in the current batch: 128
DataBatch(edge_index=[2, 18872], x=[5076, 3], y=[128], batch=[5076], ptr=[129])

Step 5:
Number of graphs in the current batch: 128
DataBatch(edge_index=[2, 19228], x=[5140, 3], y=[128], batch=[5140], ptr=[129])

Step 6:
Number of graphs in the current batch: 110
DataBatch(edge_index=[2, 16024], x=[4134, 3], y=[110], batch=[4134], ptr=[111])



In [60]:
from torch.nn import Linear
import torch.nn.functional as F
import torch_geometric.nn as geom_nn
from torch_geometric.nn import global_mean_pool
from torch.utils.tensorboard import SummaryWriter


gnn_layer_by_name = {
    "GCN": geom_nn.GCNConv,
    "GAT": geom_nn.GATConv,
    "GraphConv": geom_nn.GraphConv
}


class GNetwork(torch.nn.Module):
    def __init__(self, hidden_channels, layer_name = "GCN"):
        super(GNetwork, self).__init__()
        gnn_layer = gnn_layer_by_name[layer_name]
        torch.manual_seed(12345)
        self.conv1 = gnn_layer(dataset.num_node_features, hidden_channels)
        self.conv2 = gnn_layer(hidden_channels, hidden_channels)
        self.conv3 = gnn_layer(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x

model = GNetwork(hidden_channels=32,layer_name='GAT')
print(model)

GNetwork(
  (conv1): GATConv(3, 32, heads=1)
  (conv2): GATConv(32, 32, heads=1)
  (conv3): GATConv(32, 32, heads=1)
  (lin): Linear(in_features=32, out_features=2, bias=True)
)


In [63]:
target_model = ["GAT","GraphConv","GCN"]
all_result = []
for selected_model in target_model:
    model = GNetwork(hidden_channels=32, layer_name=selected_model)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    writer = SummaryWriter('runs/graphclassiciation/'+selected_model)

    def train():
        model.train()
        for data in train_loader:  # Iterate in batches over the training dataset.
            out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
            loss = criterion(out, data.y)  # Compute the loss.
            loss.backward()  # Derive gradients.
            optimizer.step()  # Update parameters based on gradients.
            optimizer.zero_grad()  # Clear gradients.

    def test(loader):
        model.eval()
        correct = 0
        for data in loader:  # Iterate in batches over the training/test dataset.
            out = model(data.x, data.edge_index, data.batch)  
            pred = out.argmax(dim=1)  # Use the class with highest probability.
            correct += int((pred == data.y).sum())  # Check against ground-truth labels.
        return correct / len(loader.dataset)  # Derive ratio of correct predictions.


    for epoch in range(1, 50):
        train()
        train_acc = test(train_loader)
        val_acc = test(val_loader)
        print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Val Acc: {val_acc:.4f}')
    
    print(selected_model)
    result = {"train":train_acc, "val":val_acc}
    all_result = all_result + [result]
writer.flush()


Epoch: 001, Train Acc: 0.5933, Val Acc: 0.6040
Epoch: 002, Train Acc: 0.5947, Val Acc: 0.6040
Epoch: 003, Train Acc: 0.6867, Val Acc: 0.6920
Epoch: 004, Train Acc: 0.6920, Val Acc: 0.6840
Epoch: 005, Train Acc: 0.7067, Val Acc: 0.7080
Epoch: 006, Train Acc: 0.6773, Val Acc: 0.6240
Epoch: 007, Train Acc: 0.6987, Val Acc: 0.7080
Epoch: 008, Train Acc: 0.7293, Val Acc: 0.7120
Epoch: 009, Train Acc: 0.7120, Val Acc: 0.7200
Epoch: 010, Train Acc: 0.7267, Val Acc: 0.7080
Epoch: 011, Train Acc: 0.7160, Val Acc: 0.6760
Epoch: 012, Train Acc: 0.7147, Val Acc: 0.6880
Epoch: 013, Train Acc: 0.7067, Val Acc: 0.7080
Epoch: 014, Train Acc: 0.7240, Val Acc: 0.7080
Epoch: 015, Train Acc: 0.7280, Val Acc: 0.7120
Epoch: 016, Train Acc: 0.7267, Val Acc: 0.7040
Epoch: 017, Train Acc: 0.7080, Val Acc: 0.7200
Epoch: 018, Train Acc: 0.7213, Val Acc: 0.7120
Epoch: 019, Train Acc: 0.7187, Val Acc: 0.7120
Epoch: 020, Train Acc: 0.7253, Val Acc: 0.7040
Epoch: 021, Train Acc: 0.7013, Val Acc: 0.7240
Epoch: 022, T

#### Result Summary

In [68]:
print('GAT')
print('Validation Accuracy: ',all_result[0]['val'] )
print()

print('GraphConv')
print('Validation Accuracy: ',all_result[1]['val'] )
print()

print('GCN')
print('Validation Accuracy: ',all_result[2]['val'] )

GAT
Validation Accuracy:  0.652

GraphConv
Validation Accuracy:  0.672

GCN
Validation Accuracy:  0.72


In [46]:
writer.close()