# Import Required Modules

In [2]:
from torch_geometric.data import DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.nn import GraphSAGE
from sklearn.metrics import accuracy_score
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures

# Fetch Data

In [3]:
cora = Planetoid(root='/tmp/Cora', name='Cora', transform=NormalizeFeatures())

# Define Model

In [4]:
class GraphSAGEModel(nn.Module):
    
    def __init__(self, num_features, hidden_size, num_classes, dropout_rate):
        super(GraphSAGEModel, self).__init__()
        self.conv1 = GraphSAGE(num_features, hidden_size, num_layers=1)
        self.conv2 = GraphSAGE(hidden_size, num_classes, num_layers=1)
        self.dropout = nn.Dropout(p=dropout_rate)
    
    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        x = self.dropout(x)
        x = torch.softmax(x, dim=1)
        return x

# Train and Evaluate The Model

In [5]:
def evaluate_model(model, data, test_mask):
    model.eval()
    with torch.no_grad():
        logits = model(data.x, data.edge_index)
        predicted_labels = logits.argmax(dim=1)

    accuracy = accuracy_score(data.y[test_mask].numpy(), predicted_labels[test_mask].numpy())
    return accuracy

In [6]:
def train_model(model, data, train_mask, val_mask, test_mask, patience=10, max_epochs=200, batch_size=64):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    best_val_loss = float('inf')
    best_model = None
    consecutive_no_improvement = 0
        
    loader = DataLoader([data], batch_size=batch_size, shuffle=True)

    for epoch in range(max_epochs):
        model.train()
        total_loss = 0
        total_val_loss = 0
        for batch in loader:
            optimizer.zero_grad()
            out = model(batch.x, batch.edge_index)
            loss = criterion(out[train_mask], batch.y[train_mask])
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
            with torch.no_grad():
                val_loss = criterion(out[val_mask], batch.y[val_mask])
                total_val_loss += val_loss.item()
            
        average_val_loss = total_val_loss / len(loader)
        
        if average_val_loss < best_val_loss:
            best_val_loss = average_val_loss
            best_model = model
            consecutive_no_improvement = 0
        else:
            consecutive_no_improvement += 1
        
        if consecutive_no_improvement == patience:
            print(f'Early stopping at epoch {epoch + 1} due to no improvement in validation loss.')
            break
        
    test_accuracy = evaluate_model(best_model, data, test_mask)
    print(f'Test Accuracy: {test_accuracy:.4f}')
        
    return best_model

In [7]:
cora_data = cora[0]
num_nodes = cora_data.num_nodes
train_mask = cora_data.train_mask
val_mask = cora_data.val_mask
test_mask = cora_data.test_mask

In [8]:
graph_sage_model = GraphSAGEModel(num_features=cora.num_features,
                                  hidden_size=32,
                                  num_classes=cora.num_classes,
                                  dropout_rate=0.2)

trained_graphsage_model = train_model(graph_sage_model, cora_data, train_mask, val_mask, test_mask)



Early stopping at epoch 58 due to no improvement in validation loss.
Test Accuracy: 0.7720
