In [13]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GATConv
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE


In [14]:
dataset = Planetoid(root='../data', name='Cora')
data = dataset[0].to(device)

Processing...
  out = pickle.load(f, encoding='latin1')
  out = pickle.load(f, encoding='latin1')
  out = pickle.load(f, encoding='latin1')
  out = pickle.load(f, encoding='latin1')
  out = pickle.load(f, encoding='latin1')
  out = pickle.load(f, encoding='latin1')
Done!


In [16]:
class GAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, heads=8, dropout=0.6):
        super().__init__()
        self.dropout = dropout

        # Layer 1: 8 attention heads, each producing 8 features
        # Output: 64 features (concatenated)
        self.conv1 = GATConv(
            in_channels, hidden_channels, heads=heads, dropout=dropout
        )

        # Layer 2: 1 attention head for classification
        # Input: 64 (from concat above)
        # Output: 7 classes
        self.conv2 = GATConv(
            hidden_channels * heads, out_channels, heads=1, concat=False, dropout=dropout
        )

    def forward(self, x, edge_index):
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

    def get_embeddings(self, x, edge_index):
        """Extract layer-1 embeddings for t-SNE visualization later."""
        x = self.conv1(x, edge_index)
        x = F.elu(x)
        return x


model = GAT(
    in_channels=dataset.num_features,   
    hidden_channels=8,                   
    out_channels=dataset.num_classes,    
    heads=8,
    dropout=0.6
).to(device)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)

train_losses = []
train_accs = []
val_accs = []


def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    pred = out[data.train_mask].argmax(dim=1)
    acc = (pred == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
    return loss.item(), acc


@torch.no_grad()
def evaluate(mask):
    model.eval()
    out = model(data.x, data.edge_index)
    pred = out[mask].argmax(dim=1)
    acc = (pred == data.y[mask]).sum().item() / mask.sum().item()
    return acc


best_val_acc = 0
for epoch in range(1, 201):
    loss, train_acc = train()
    val_acc = evaluate(data.val_mask)

    train_losses.append(loss)
    train_accs.append(train_acc)
    val_accs.append(val_acc)

    if val_acc > best_val_acc:
        best_val_acc = val_acc

    if epoch % 20 == 0: # Print every 20 epochs
        print(f"Epoch {epoch:03d} | Loss: {loss:.4f} | Train Acc: {train_acc:.4f} | Val Acc: {val_acc:.4f}")

print(f"\nBest Validation Accuracy: {best_val_acc:.4f}")


Epoch 020 | Loss: 0.7988 | Train Acc: 0.7929 | Val Acc: 0.8120
Epoch 040 | Loss: 0.7258 | Train Acc: 0.7929 | Val Acc: 0.7980
Epoch 060 | Loss: 0.5277 | Train Acc: 0.8286 | Val Acc: 0.7680
Epoch 080 | Loss: 0.4271 | Train Acc: 0.8643 | Val Acc: 0.7720
Epoch 100 | Loss: 0.4322 | Train Acc: 0.8571 | Val Acc: 0.7560
Epoch 120 | Loss: 0.4012 | Train Acc: 0.8643 | Val Acc: 0.7660
Epoch 140 | Loss: 0.4183 | Train Acc: 0.8357 | Val Acc: 0.7680
Epoch 160 | Loss: 0.4253 | Train Acc: 0.8429 | Val Acc: 0.7700
Epoch 180 | Loss: 0.3802 | Train Acc: 0.8571 | Val Acc: 0.7720
Epoch 200 | Loss: 0.4172 | Train Acc: 0.8571 | Val Acc: 0.7660

Best Validation Accuracy: 0.8120
