<a href="https://colab.research.google.com/github/achheman/GNN/blob/main/GNN_PyTorch_workshop.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Node Classification using GNN

### Install torch

In [None]:
!pip install -q torch

### Install PyTorch Geometric

In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
os.environ['PYTHONWARNINGS'] = "ignore"
!pip install torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install git+https://github.com/pyg-team/pytorch_geometric.git

### Ingest Planetoid Cora dataset

In [None]:
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())

print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

node_id = 0
node_class = int(data.y[node_id])
print(f"Class of node {node_id}: {node_class}")

data = dataset[0]  # Get the first graph object.
print(data)

    Categories of classes in the graph:
    
    0: "Neural_Networks",
    1: "Rule_Learning",
    2: "Reinforcement_Learning",
    3: "Probabilistic_Methods",
    4: "Theory",
    5: "Genetic_Algorithms",
    6: "Case_Based_Reasoning"

### Training a Graph Neural Network (GNN)

In [None]:
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        torch.manual_seed(1234567)
        self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        # Create 2nd Graph conv layer and then run

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        return x

# Create model with 16 hidden channels
print(model)

### Visualizing Untrained GCN network

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

def visualize(h, color):
    z = TSNE(n_components=2).fit_transform(h.detach().cpu().numpy())

    plt.figure(figsize=(10,10))
    plt.xticks([])
    plt.yticks([])

    plt.scatter(z[:, 0], z[:, 1], s=70, c=color, cmap="Set2")
    plt.show()


    model.eval()

    out = model(data.x, data.edge_index)
    visualize(out, color=data.y)

### Training a model

In [None]:
model = GCN(hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

def train():
      model.train()
      optimizer.zero_grad()
      out = model(data.x, data.edge_index)
      loss = criterion(out[data.train_mask], data.y[data.train_mask])
      loss.backward()
      optimizer.step()
      return loss

def test():
      model.eval()
      out = model(data.x, data.edge_index)
      pred = out.argmax(dim=1)
      test_correct = pred[data.test_mask] == data.y[data.test_mask]
      test_acc = int(test_correct.sum()) / int(data.test_mask.sum())
      return test_acc


for epoch in range(1, 101):
    loss = train()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}')

### Evaluate model on unseen data (Test data)

In [None]:
test_acc = test()
print(f'Test Accuracy: {test_acc:.4f}')

### Visualise the output

In [None]:
model.eval()
out = model(data.x, data.edge_index)
visualize(out, color=data.y)