In [None]:
import random
import numpy as np
import pandas as pd

import torch
import torch.nn.functional as F
from torch_geometric.datasets import ZINC
from torch_geometric.loader import DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool

In [None]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
train_dataset = ZINC(root='data/ZINC', subset=True, split='train')
val_dataset = ZINC(root='data/ZINC', subset=True, split='val')
test_dataset = ZINC(root='data/ZINC', subset=True, split='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Tamanho dos datasets:")
print(f"Treino: {len(train_dataset)} amostras")
print(f"Validação: {len(val_dataset)} amostras")
print(f"Teste: {len(test_dataset)} amostras")

In [None]:
class GCNGraph(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, num_layers=10):
        super().__init__()

        self.num_layers = num_layers
        self.convs = torch.nn.ModuleList()

        self.convs.append(GCNConv(in_channels, hidden_channels))

        for _ in range(num_layers - 2):
            self.convs.append(GCNConv(hidden_channels, hidden_channels))

        self.convs.append(GCNConv(hidden_channels, hidden_channels))

        self.lin = torch.nn.Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)

        x = global_mean_pool(x, batch)

        return self.lin(x)


In [None]:
def train_epoch(model, loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0.0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        out = model(batch.x.float(), batch.edge_index, batch.batch)
        out = out.view(-1)                
        target = batch.y.view(-1).float() 

        loss = loss_fn(out, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


In [None]:
@torch.no_grad()
def eval_epoch(model, loader, loss_fn, device):
    model.eval()
    total_loss = 0.0

    for batch in loader:
        batch = batch.to(device)

        out = model(batch.x.float(), batch.edge_index, batch.batch)
        out = out.view(-1)
        target = batch.y.view(-1).float()

        loss = loss_fn(out, target)
        total_loss += loss.item()

    return total_loss / len(loader)


In [None]:
def train_gcn_regression(
    model,
    train_loader,
    val_loader,
    test_loader,
    device,
    epochs=50,
    lr=1e-3
):
    model = model.to(device)

    loss_fn = torch.nn.L1Loss()  # MAE
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    history = {
        "epoch": [],
        "train_loss": [],
        "val_mae": []
    }

    for epoch in range(epochs):
        train_loss = train_epoch(
            model, train_loader, optimizer, loss_fn, device
        )
        val_mae = eval_epoch(
            model, val_loader, loss_fn, device
        )

        history["epoch"].append(epoch + 1)
        history["train_loss"].append(train_loss)
        history["val_mae"].append(val_mae)

        print(
            f"Epoch {epoch+1:03d} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val MAE: {val_mae:.4f}"
        )

    test_mae = eval_epoch(model, test_loader, loss_fn, device)
    print(f"Test MAE: {test_mae:.4f}")

    history["test_mae"] = [None] * (epochs - 1) + [test_mae]

    return history


In [None]:
for seed in [42, 7, 5, 9]:
  set_seed(seed)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model = GCNGraph(
    in_channels=train_dataset.num_features,
    hidden_channels=16,
    out_channels=1
  )

  history = train_gcn_regression(
      model=model,
      train_loader=train_loader,
      val_loader=val_loader,
      test_loader=test_loader,
      device=device,
      epochs=50,
      lr=1e-3
  )

  df = pd.DataFrame(history)
  df.to_csv(f"training_metrics.csv_{seed}", index=False)
