In [1]:
# -----------------------------
# 0. Carregar Dependências
# -----------------------------
import os
import json
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import train_test_split_edges, negative_sampling
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score

# -----------------------------
# 1. Carregar Dataset
# -----------------------------
os.makedirs("../datasets", exist_ok=True)
dataset = Planetoid(root="../datasets/Cora", name="Cora")
data = dataset[0]

# divide arestas em treino/val/test
data = train_test_split_edges(data)

# -----------------------------
# 2. Definir Modelo GCN
# -----------------------------
class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

# -----------------------------
# 3. Link Predictor
# -----------------------------
class LinkPredictor(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels=1):
        super(LinkPredictor, self).__init__()
        self.lin1 = torch.nn.Linear(in_channels * 2, hidden_channels)
        self.lin2 = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x_i, x_j):
        z = torch.cat([x_i, x_j], dim=-1)
        z = F.relu(self.lin1(z))
        z = torch.sigmoid(self.lin2(z))
        return z.view(-1)

# -----------------------------
# 4. Funções de treino e teste
# -----------------------------
def train(gcn, predictor, optimizer, data):
    gcn.train()
    predictor.train()

    z = gcn(data.x, data.train_pos_edge_index)

    # negativos amostrados
    neg_edge_index = negative_sampling(
        edge_index=data.train_pos_edge_index,
        num_nodes=data.num_nodes,
        num_neg_samples=data.train_pos_edge_index.size(1),
    )

    # predições positivas e negativas
    pos_pred = predictor(z[data.train_pos_edge_index[0]], z[data.train_pos_edge_index[1]])
    neg_pred = predictor(z[neg_edge_index[0]], z[neg_edge_index[1]])

    preds = torch.cat([pos_pred, neg_pred], dim=0)

    # rótulos diretos (sem função auxiliar)
    labels = torch.cat([
        torch.ones(pos_pred.size(0), device=preds.device, dtype=preds.dtype),
        torch.zeros(neg_pred.size(0), device=preds.device, dtype=preds.dtype)
    ], dim=0)

    loss = F.binary_cross_entropy(preds, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    return loss

@torch.no_grad()
def test(gcn, predictor, data):
    gcn.eval()
    predictor.eval()

    z = gcn(data.x, data.train_pos_edge_index)

    def eval_edges(pos_edge_index, neg_edge_index):
        pos_pred = predictor(z[pos_edge_index[0]], z[pos_edge_index[1]])
        neg_pred = predictor(z[neg_edge_index[0]], z[neg_edge_index[1]])
        preds = torch.cat([pos_pred, neg_pred], dim=0)

        labels = torch.cat([
            torch.ones(pos_pred.size(0), device=preds.device, dtype=preds.dtype),
            torch.zeros(neg_pred.size(0), device=preds.device, dtype=preds.dtype)
        ], dim=0)

        auc = roc_auc_score(labels.cpu(), preds.cpu())
        return auc

    auc_val = eval_edges(data.val_pos_edge_index, data.val_neg_edge_index)
    auc_test = eval_edges(data.test_pos_edge_index, data.test_neg_edge_index)

    return auc_val, auc_test

# -----------------------------
# 5. Treinar modelo
# -----------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gcn = GCN(dataset.num_features, 64, 32).to(device)
predictor = LinkPredictor(32, 64).to(device)
data = data.to(device)

optimizer = torch.optim.Adam(
    list(gcn.parameters()) + list(predictor.parameters()), lr=0.01
)

for epoch in range(1, 101):
    loss = train(gcn, predictor, optimizer, data)
    if epoch % 10 == 0:
        val_auc, test_auc = test(gcn, predictor, data)
        print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, Val AUC: {val_auc:.4f}, Test AUC: {test_auc:.4f}")

# -----------------------------
# 6. Salvar Modelo 
# -----------------------------
os.makedirs("../models", exist_ok=True)
ckpt_gcn = "../models/gcn_state.pt"
ckpt_pred = "../models/predictor_state.pt"
ckpt_meta = "../models/meta.pt"
ckpt_graph = "../models/graph_artifacts.pt"

# Salva pesos
torch.save(gcn.state_dict(), ckpt_gcn)
torch.save(predictor.state_dict(), ckpt_pred)

# Salva metadados
torch.save({
    "num_features": dataset.num_features,
    "gcn_hidden": 64,
    "gcn_out": 32,
    "pred_hidden": 64
}, ckpt_meta)

# Salva artefatos do grafo
torch.save({
    "x": data.x.cpu(),
    "train_pos_edge_index": data.train_pos_edge_index.cpu()
}, ckpt_graph)

print("\n")

# -----------------------------
# 7. Métricas adicionais 
# -----------------------------
gcn.eval(); predictor.eval()
with torch.no_grad():
    z = gcn(data.x, data.train_pos_edge_index)

    # Val
    pos = predictor(z[data.val_pos_edge_index[0]], z[data.val_pos_edge_index[1]]).cpu().numpy()
    neg = predictor(z[data.val_neg_edge_index[0]], z[data.val_neg_edge_index[1]]).cpu().numpy()
    probs  = np.concatenate([pos, neg], axis=0)
    labels = np.concatenate([np.ones_like(pos), np.zeros_like(neg)], axis=0)
    preds  = (probs >= 0.5).astype(int)
    val_acc  = accuracy_score(labels, preds)
    val_prec = precision_score(labels, preds, zero_division=0)
    val_rec  = recall_score(labels, preds, zero_division=0)
    val_f1   = f1_score(labels, preds, zero_division=0)

    # Test
    pos = predictor(z[data.test_pos_edge_index[0]], z[data.test_pos_edge_index[1]]).cpu().numpy()
    neg = predictor(z[data.test_neg_edge_index[0]], z[data.test_neg_edge_index[1]]).cpu().numpy()
    probs  = np.concatenate([pos, neg], axis=0)
    labels = np.concatenate([np.ones_like(pos), np.zeros_like(neg)], axis=0)
    preds  = (probs >= 0.5).astype(int)
    test_acc  = accuracy_score(labels, preds)
    test_prec = precision_score(labels, preds, zero_division=0)
    test_rec  = recall_score(labels, preds, zero_division=0)
    test_f1   = f1_score(labels, preds, zero_division=0)

# Monta dicionário e salva em JSON
results = {
    "threshold": 0.5,
    "val": {
        "accuracy": float(val_acc),
        "precision": float(val_prec),
        "recall": float(val_rec),
        "f1": float(val_f1),
    },
    "test": {
        "accuracy": float(test_acc),
        "precision": float(test_prec),
        "recall": float(test_rec),
        "f1": float(test_f1),
    }
}

os.makedirs("../results", exist_ok=True)
with open("../results/result.json", "w") as f:
    json.dump(results, f, indent=2)

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!


Epoch 010, Loss: 0.6276, Val AUC: 0.6331, Test AUC: 0.6202
Epoch 020, Loss: 0.5101, Val AUC: 0.7674, Test AUC: 0.7418
Epoch 030, Loss: 0.3537, Val AUC: 0.8298, Test AUC: 0.8221
Epoch 040, Loss: 0.2803, Val AUC: 0.8417, Test AUC: 0.8514
Epoch 050, Loss: 0.2376, Val AUC: 0.8364, Test AUC: 0.8512
Epoch 060, Loss: 0.1986, Val AUC: 0.8508, Test AUC: 0.8616
Epoch 070, Loss: 0.1741, Val AUC: 0.8645, Test AUC: 0.8713
Epoch 080, Loss: 0.1564, Val AUC: 0.8657, Test AUC: 0.8710
Epoch 090, Loss: 0.1517, Val AUC: 0.8667, Test AUC: 0.8729
Epoch 100, Loss: 0.1491, Val AUC: 0.8648, Test AUC: 0.8705


