In [None]:
!pip -q install torch torch_geometric

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import os, random, numpy as np, torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv
import torch_geometric.transforms as T

In [None]:
# --------------------------
# 0) Config y reproducibilidad
# --------------------------
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Device:", device)

# Hiperparámetros
HIDDEN_DIM = 16
DROPOUT    = 0.5
LR         = 0.01
WEIGHT_DEC = 5e-4
EPOCHS     = 200

Device: cpu


In [None]:
# --------------------------
# 1) Dataset: Cora (Planetoid)
#    Incluye máscaras de train/val/test
#    Normalizamos features por nodo
# --------------------------
root = "data/Planetoid"
dataset = Planetoid(root=root, name="Cora", transform=T.NormalizeFeatures())
data = dataset[0].to(device)  # Un único grafo con x, edge_index, y, train_mask, val_mask, test_mask

print(f"#Nodos: {data.num_nodes}  | #Aristas: {data.num_edges}  | #Features: {dataset.num_features}  | #Clases: {dataset.num_classes}")
print(f"Máscaras -> train: {int(data.train_mask.sum())}, val: {int(data.val_mask.sum())}, test: {int(data.test_mask.sum())}")

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...


#Nodos: 2708  | #Aristas: 10556  | #Features: 1433  | #Clases: 7
Máscaras -> train: 140, val: 500, test: 1000


Done!


In [None]:
# --------------------------
# 2) Modelo GCN (2 capas)
# --------------------------
class GCN(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, dropout=0.5):
        super().__init__()
        self.conv1 = GCNConv(in_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, out_dim)
        self.dropout = dropout

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)   # Mensajería en el grafo
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.conv2(x, edge_index)
        return x

model = GCN(dataset.num_features, HIDDEN_DIM, dataset.num_classes, DROPOUT).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DEC)
criterion = nn.CrossEntropyLoss()

In [None]:
# --------------------------
# 3) Utilidades de accuracy en máscaras (nodos)
# --------------------------
@torch.no_grad()
def masked_accuracy(logits, y, mask):
    preds = logits.argmax(dim=1)
    correct = (preds[mask] == y[mask]).sum().item()
    total = int(mask.sum())
    return 100.0 * correct / total

In [None]:
# --------------------------
# 4) Entrenamiento
# --------------------------
best_val_acc, best_state = 0.0, None
for epoch in range(1, EPOCHS + 1):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)          # [N, C]
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()

    # Eval
    model.eval()
    with torch.no_grad():
        logits = model(data.x, data.edge_index)
        train_acc = masked_accuracy(logits, data.y, data.train_mask)
        val_acc   = masked_accuracy(logits, data.y, data.val_mask)
        test_acc  = masked_accuracy(logits, data.y, data.test_mask)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}

    if epoch % 20 == 0 or epoch == 1:
        print(f"[{epoch:03d}] loss={loss.item():.4f} | train={train_acc:.2f}% | val={val_acc:.2f}% | test={test_acc:.2f}%")


[001] loss=1.9459 | train=32.14% | val=14.40% | test=15.70%
[020] loss=1.7174 | train=85.71% | val=52.60% | test=53.60%
[040] loss=1.3281 | train=96.43% | val=70.40% | test=70.30%
[060] loss=0.9540 | train=99.29% | val=76.00% | test=77.20%
[080] loss=0.6925 | train=99.29% | val=77.60% | test=79.00%
[100] loss=0.5591 | train=99.29% | val=76.60% | test=79.50%
[120] loss=0.4700 | train=99.29% | val=77.40% | test=80.20%
[140] loss=0.4071 | train=99.29% | val=77.80% | test=80.10%
[160] loss=0.3849 | train=100.00% | val=77.80% | test=80.10%
[180] loss=0.3435 | train=100.00% | val=77.80% | test=80.20%
[200] loss=0.3223 | train=100.00% | val=78.00% | test=79.90%


Ejercicios:
1) Agregar early stopping.
2) Comparar GCNConv con GraphSAGE, Message Passing y GAT.
3) Comparar la performance en distintos datasets: Cora, CiteSeer, PubMed, etc.