In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

!pip install torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.data import DataLoader
from torch_geometric.nn import MessagePassing

# Define the Graph Attention Layer
class GraphAttentionLayer(MessagePassing):
    def __init__(self, in_features, out_features, dropout=0.5):
        super(GraphAttentionLayer, self).__init__(aggr='add')  # Use 'add' aggregation for GAT
        self.lin = nn.Linear(in_features, out_features)
        self.att = nn.Parameter(torch.Tensor(1, out_features * 2))
        self.dropout = dropout

        self.reset_parameters()

    def reset_parameters(self):
        nn.init.xavier_uniform_(self.lin.weight.data)
        nn.init.xavier_uniform_(self.att.data)

    def forward(self, x, edge_index):
        x = self.lin(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        return self.propagate(edge_index, x=x)

    def message(self, x_i, x_j, edge_index):
        alpha = (torch.cat([x_i, x_j], dim=-1) * self.att).sum(dim=-1)
        alpha = F.leaky_relu(alpha, negative_slope=0.2)
        alpha = torch.exp(alpha)
        return x_j * alpha.view(-1, 1)

    def update(self, aggr_out):
        return aggr_out


# Define the GAT model
class GAT(nn.Module):
    def __init__(self, in_features, hidden_dim, out_features, num_heads, dropout=0.5):
        super(GAT, self).__init__()

        self.attention_heads = nn.ModuleList()
        for _ in range(num_heads):
            self.attention_heads.append(GraphAttentionLayer(in_features, hidden_dim, dropout=dropout))

        self.out_projection = nn.Linear(hidden_dim * num_heads, out_features)

    def forward(self, x, edge_index):
        head_outputs = [attention_head(x, edge_index) for attention_head in self.attention_heads]
        x = torch.cat(head_outputs, dim=-1)
        x = self.out_projection(x)
        return F.log_softmax(x, dim=1)


# Load and preprocess the Cora dataset
dataset = Planetoid(root='data/Cora', name='Cora')
data = dataset[0]

# Create DataLoader
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize the model
model = GAT(in_features=data.num_features, hidden_dim=8, out_features=dataset.num_classes, num_heads=8, dropout=0.6)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss criterion
criterion = nn.CrossEntropyLoss()

# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Training loop
model.train()
for epoch in range(200):
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        output = model(batch.x, batch.edge_index)
        loss = criterion(output[batch.train_mask], batch.y[batch.train_mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch.num_graphs

    if epoch % 10 == 0:
        print(f'Epoch: {epoch}, Loss: {total_loss / len(dataset)}')

# Evaluation
model.eval()
_, pred = model(data.x.to(device), data.edge_index.to(device)).max(dim=1)
correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
accuracy = correct / data.test_mask.sum().item()
print(f'Test Accuracy: {accuracy}')

Epoch: 0, Loss: 2.0744190216064453
Epoch: 10, Loss: 0.0713529959321022
Epoch: 20, Loss: 2.309246778488159
Epoch: 30, Loss: 3.3129541873931885
Epoch: 40, Loss: 5.489168643951416
Epoch: 50, Loss: 2.7130422592163086
Epoch: 60, Loss: 56.60102081298828
Epoch: 70, Loss: 0.42846646904945374
Epoch: 80, Loss: 0.049449026584625244
Epoch: 90, Loss: 29.511062622070312
Epoch: 100, Loss: 0.22187849879264832
Epoch: 110, Loss: 0.2849466800689697
Epoch: 120, Loss: 10.60549545288086
Epoch: 130, Loss: 0.0865437388420105
Epoch: 140, Loss: 0.08579030632972717
Epoch: 150, Loss: 0.00396055867895484
Epoch: 160, Loss: 0.05577962473034859
Epoch: 170, Loss: 0.19894298911094666
Epoch: 180, Loss: 0.020448096096515656
Epoch: 190, Loss: 0.04405169188976288
Test Accuracy: 0.706
