<a href="https://colab.research.google.com/github/anirudh-chakravarthy/IR-Assignment/blob/main/Assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

In [None]:
%cd gdrive/MyDrive/Academics/IR-Assignment/

In [None]:
!pip install torch==1.7.0 torchvision==0.8.0 torchaudio==0.7.0

In [None]:
!pip install torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html
!pip install torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html
!pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.7.0+cu102.html
!pip install torch-geometric

## References
1. https://github.com/KMint1819/SociaMedia/blob/ba2cca9e416461ae9916a0388cccf498b5b3ae57/hw3_gnn/Copy_of_Graph_Neural_Networks.ipynb
2. https://github.com/YingtongDou/GCNN
3. https://chrsmrrs.github.io/datasets/docs/datasets/

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Linear
from torch.optim import Adam
from torch.utils.data import random_split
from torch_geometric.data import DataLoader
from torch_geometric.datasets import TUDataset
from torch_geometric.nn import GATConv, global_mean_pool

from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
import pdb

BATCH_SIZE = 128
LR = 1e-3
NUM_EPOCHS = 1000
SEED = 42

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)

In [None]:
class GeometricNet(nn.Module):
  def __init__(self, 
               conv_in_layers,
               conv_out_layers=64,
               num_classes=2):
    super(GeometricNet, self).__init__()

    assert conv_out_layers % 2 == 0, 'Convolution output should have even number of layers'
    fc_out_layers = conv_out_layers//2

    self.conv1 = GATConv(conv_in_layers, conv_out_layers)
    self.conv2 = GATConv(conv_out_layers, conv_out_layers)
    self.fc1 = Linear(conv_out_layers, fc_out_layers)
    self.fc2 = Linear(fc_out_layers, num_classes)

    self.activation = nn.SELU(inplace=True)
    # self.softmax = nn.Softmax(dim=-1)

  def forward(self, data):
    x, edge_index, batch = data.x, data.edge_index, data.batch
    if data.num_node_features == 0:
      x = torch.ones(data.num_nodes, 1, device=torch.cuda.current_device())

    x = self.conv1(x, edge_index)
    x = self.activation(x)
    x = self.conv2(x, edge_index)
    x = self.activation(x)
    x = global_mean_pool(x, batch)
    x = self.activation(x)
    x = self.fc1(x)
    x = self.activation(x)
    x = self.fc2(x)
    # x = self.softmax(x)
    # x = F.log_softmax(x, dim=-1)
    return x

def test(model, loader):
  acc, f1, precision, recall = 0., 0., 0., 0.
  model.eval()
  # correct = 0.

  for i, data in enumerate(loader):
    y = data.y.cpu().numpy()
    data = data.to(torch.cuda.current_device())
    pred = model(data)
    y_pred = F.softmax(pred, dim=-1).argmax(dim=1).cpu().numpy()
    # correct += data.y.eq(y_pred).sum().item()
    acc += accuracy_score(y, y_pred)
    f1 += f1_score(y, y_pred, average='macro')
    precision += precision_score(y, y_pred, zero_division=0)
    recall += recall_score(y, y_pred, zero_division=0)

  acc /= len(test_loader)
  f1 /= len(test_loader)
  precision /= len(test_loader)
  recall /= len(test_loader)
  return acc, f1, precision, recall
  # return correct / len(loader)

In [None]:
# dataset = TUDataset(root='data/', name='SYNTHETIC', use_node_attr=True)
dataset = TUDataset(root='data/', name='AIDS', use_node_attr=True)
# dataset = TUDataset(root='data/', name='PROTEINS', use_node_attr=True)

num_training = int(len(dataset) * 0.8)
num_test = len(dataset) - num_training
training_set, test_set = random_split(dataset, [num_training, num_test], generator=torch.Generator().manual_seed(SEED))

train_loader = DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False)

In [None]:
model = GeometricNet(max(dataset.num_node_features, 1), 
                     num_classes=dataset.num_classes).to(torch.cuda.current_device())
optimizer = Adam(model.parameters(), lr=LR)
loss_fn = nn.MultiMarginLoss()
# loss_fn = nn.CrossEntropyLoss()

for epoch in range(NUM_EPOCHS):
  model.train()
  for i, data in enumerate(train_loader):
    data = data.to(torch.cuda.current_device())
    pred = model(data)
    loss = loss_fn(pred, data.y)
    # loss = F.nll_loss(pred, data.y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  if epoch % 10 == 0:
    acc, f1, precision, recall = test(model, test_loader)
    print("Epoch {}. Loss: {:.4f}. Test accuracy: {:.4f}, F1: {:.4f}, Precision: {:.4f}, Recall: {:.4f}".format(
        epoch, loss, acc, f1, precision, recall))