In [53]:
from torch_geometric.datasets import ZINC
from torch_geometric.loader import DataLoader
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool
import random
import numpy as np

In [54]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [55]:
train_dataset = ZINC(root='data/ZINC', subset=True, split='train')
val_dataset = ZINC(root='data/ZINC', subset=True, split='val')
test_dataset = ZINC(root='data/ZINC', subset=True, split='test')

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

print(f"Tamanho dos datasets:")
print(f"Treino: {len(train_dataset)} amostras")
print(f"Validação: {len(val_dataset)} amostras")
print(f"Teste: {len(test_dataset)} amostras")

Tamanho dos datasets:
Treino: 10000 amostras
Validação: 1000 amostras
Teste: 1000 amostras


In [56]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GATConv, global_mean_pool
from torch import nn

class GATGraph(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.heads = 8
        self.convs = torch.nn.ModuleList()

        # Criando 10 camadas de GATConv
        for i in range(10):
            in_feats = in_channels if i == 0 else hidden_channels
            out_feats = hidden_channels // self.heads if i < 9 else hidden_channels  # Última camada não terá divisão de cabeças
            self.convs.append(GATConv(in_feats, out_feats, heads=self.heads, concat=True if i < 9 else False))

        self.lin = nn.Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index, batch):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.leaky_relu(x)

        x = global_mean_pool(x, batch)
        return self.lin(x)


In [57]:
def train_epoch(model, loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0.0

    for batch in loader:
        batch = batch.to(device)

        optimizer.zero_grad()
        out = model(batch.x.float(), batch.edge_index, batch.batch)
        out = out.view(-1)                # [B]
        target = batch.y.view(-1).float() # [B]

        loss = loss_fn(out, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(loader)


In [58]:
@torch.no_grad()
def eval_epoch(model, loader, loss_fn, device):
    model.eval()
    total_loss = 0.0

    for batch in loader:
        batch = batch.to(device)

        out = model(batch.x.float(), batch.edge_index, batch.batch)
        out = out.view(-1)
        target = batch.y.view(-1).float()

        loss = loss_fn(out, target)
        total_loss += loss.item()

    return total_loss / len(loader)


In [59]:
def train_gat_regression(
    model,
    train_loader,
    val_loader,
    test_loader,
    device,
    epochs=50,
    lr=1e-3
):
    model = model.to(device)

    loss_fn = torch.nn.L1Loss()  # MAE
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    history = {
        "epoch": [],
        "train_loss": [],
        "val_mae": []
    }

    for epoch in range(epochs):
        train_loss = train_epoch(
            model, train_loader, optimizer, loss_fn, device
        )
        val_mae = eval_epoch(
            model, val_loader, loss_fn, device
        )

        history["epoch"].append(epoch + 1)
        history["train_loss"].append(train_loss)
        history["val_mae"].append(val_mae)

        print(
            f"Epoch {epoch+1:03d} | "
            f"Train Loss: {train_loss:.4f} | "
            f"Val MAE: {val_mae:.4f}"
        )

    test_mae = eval_epoch(model, test_loader, loss_fn, device)
    print(f"Test MAE: {test_mae:.4f}")

    history["test_mae"] = [None] * (epochs - 1) + [test_mae]

    return history


In [60]:
train_dataset.num_features

1

In [61]:
import pandas as pd
for seed in [42, 7, 5, 9]:
  set_seed(seed)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model = GATGraph(
    in_channels=train_dataset.num_features,
    hidden_channels=16,
    out_channels=1
  )

  history = train_gat_regression(
      model=model,
      train_loader=train_loader,
      val_loader=val_loader,
      test_loader=test_loader,
      device=device,
      epochs=50,
      lr=1e-3
  )

  df = pd.DataFrame(history)
  df.to_csv(f"training_metrics.csv_{seed}", index=False)


Epoch 001 | Train Loss: 1.2583 | Val MAE: 1.1233
Epoch 002 | Train Loss: 1.1006 | Val MAE: 1.0579
Epoch 003 | Train Loss: 1.0483 | Val MAE: 0.9939
Epoch 004 | Train Loss: 0.9768 | Val MAE: 0.9182
Epoch 005 | Train Loss: 0.9159 | Val MAE: 0.8757
Epoch 006 | Train Loss: 0.8808 | Val MAE: 0.8735
Epoch 007 | Train Loss: 0.8567 | Val MAE: 0.8360
Epoch 008 | Train Loss: 0.8381 | Val MAE: 0.8317
Epoch 009 | Train Loss: 0.8172 | Val MAE: 0.8291
Epoch 010 | Train Loss: 0.8114 | Val MAE: 0.7990
Epoch 011 | Train Loss: 0.8033 | Val MAE: 0.8165
Epoch 012 | Train Loss: 0.8058 | Val MAE: 0.8044
Epoch 013 | Train Loss: 0.7904 | Val MAE: 0.7877
Epoch 014 | Train Loss: 0.7867 | Val MAE: 0.7952
Epoch 015 | Train Loss: 0.7796 | Val MAE: 0.7873
Epoch 016 | Train Loss: 0.7776 | Val MAE: 0.7742
Epoch 017 | Train Loss: 0.7699 | Val MAE: 0.7688
Epoch 018 | Train Loss: 0.7710 | Val MAE: 0.7658
Epoch 019 | Train Loss: 0.7611 | Val MAE: 0.7598
Epoch 020 | Train Loss: 0.7570 | Val MAE: 0.7653
Epoch 021 | Train Lo