In [16]:
import numpy as np

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.preprocessing import StandardScaler

import torch
from torch_geometric.datasets import CitationFull
from torch_geometric.nn import GCNConv
import torch.nn.functional as F

In [8]:
# Load the dataset
dataset = CitationFull(root='/tmp/Cora', name='Cora')
data = dataset[0]

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz
Processing...
Done!


In [9]:
# ----------------------------------
# 🌲 Gradient Boosting Classifier
# ----------------------------------

x = data.x.cpu().numpy()
y = data.y.cpu().numpy()

# Manual train/test split (70/30)
np.random.seed(42)
num_nodes = x.shape[0]
indices = np.random.permutation(num_nodes)
split = int(0.7 * num_nodes)
train_idx, test_idx = indices[:split], indices[split:]

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [10]:
gb = GradientBoostingClassifier(n_estimators=100)
gb.fit(x_scaled[train_idx], y[train_idx])
y_pred = gb.predict(x_scaled[test_idx])
gb_acc = accuracy_score(y[test_idx], y_pred)
print(f"🌲 Gradient Boosting Accuracy: {gb_acc:.4f}")

🌲 Gradient Boosting Accuracy: 0.5428


In [None]:
f1_score(y[test_idx], y_pred, average='weighted')

0.541897049759113

In [17]:
precision_score(y[test_idx], y_pred, average='weighted')

0.5543566689413483

In [18]:
recall_score(y[test_idx], y_pred, average='weighted')

0.5427753452340855

In [20]:
roc_auc_score(y[test_idx], gb.predict_proba(x_scaled[test_idx]), average='weighted', multi_class='ovr')

np.float64(0.9037628278450435)

In [11]:
# ----------------------------------
# 🧠 Graph Convolutional Network
# ----------------------------------

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training, p=0.5)
        x = self.conv2(x, edge_index)
        return x

In [12]:
model = GCN(data.num_node_features, 64, dataset.num_classes).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Create masks
train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
train_mask[train_idx] = True
test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
test_mask[test_idx] = True

# Training loop
model.train()
for epoch in range(200):
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.cross_entropy(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()
    if epoch % 20 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 4.2471
Epoch 20, Loss: 1.2284
Epoch 40, Loss: 0.9122
Epoch 60, Loss: 0.7915
Epoch 80, Loss: 0.7258
Epoch 100, Loss: 0.6929
Epoch 120, Loss: 0.6552
Epoch 140, Loss: 0.6353
Epoch 160, Loss: 0.6218
Epoch 180, Loss: 0.6120


In [13]:
# Evaluation
model.eval()
out = model(data.x, data.edge_index)
pred = out.argmax(dim=1)
correct = pred[test_mask] == data.y[test_mask]
gcn_acc = int(correct.sum()) / int(test_mask.sum())
print(f"🧠 GCN Accuracy: {gcn_acc:.4f}")


🧠 GCN Accuracy: 0.7216


In [33]:
l = data.y[test_mask]

In [55]:
scores = F.softmax(out, dim=1)[test_mask, pred[test_mask]]

In [35]:
f1_score(l, pred[test_mask], average='weighted')

0.7179227234444894

In [51]:
accuracy_score(l, pred[test_mask])

0.7216234422364433

In [52]:
recall_score(l, pred[test_mask], average='weighted')

0.7216234422364433

In [53]:
precision_score(l, pred[test_mask], average='weighted')

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0.7233653335468685

In [60]:
roc_auc_score(l, F.softmax(out, dim=1)[test_mask].detach().numpy(), average='weighted', multi_class='ovr')

np.float64(0.9852935307159091)