In [10]:
import os.path as osp
import time
import torch
import torch.nn.functional as F
from sklearn.linear_model import LogisticRegression

from torch_geometric.datasets import Planetoid
from torch_geometric.loader import LinkNeighborLoader
from torch_geometric.nn import GraphSAGE

import importlib

import training
importlib.reload(training)

<module 'training' from 'c:\\Users\\csaba\\Documents\\Coding\\git_own\\thesis_coding\\training.py'>

In [11]:
dataset,data = training.load_dataset()

In [12]:
# Link prediction loader (self-supervised)
train_loader = LinkNeighborLoader(
    data,
    batch_size=256,
    shuffle=True,
    neg_sampling_ratio=1.0,
    num_neighbors=[10, 10],
)

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device, 'x', 'edge_index')

# GraphSAGE encoder (no classification head)
model = GraphSAGE(
    in_channels=dataset.num_features,
    hidden_channels=64,
    num_layers=2,
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# Self-supervised training via link prediction
def train():
    model.train()
    total_loss = 0
    for batch in train_loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        h = model(batch.x, batch.edge_index)
        h_src = h[batch.edge_label_index[0]]
        h_dst = h[batch.edge_label_index[1]]
        pred = (h_src * h_dst).sum(dim=-1)
        loss = F.binary_cross_entropy_with_logits(pred, batch.edge_label)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * pred.size(0)
    return total_loss / data.num_nodes

# Evaluation: use learned embeddings for node classification
@torch.no_grad()
def test():
    model.eval()
    h = model(data.x, data.edge_index).cpu().numpy()
    y = data.y.cpu().numpy()
    clf = LogisticRegression(max_iter=5000)
    clf.fit(h[data.train_mask.cpu().numpy()], y[data.train_mask.cpu().numpy()])
    val_acc = clf.score(h[data.val_mask.cpu().numpy()], y[data.val_mask.cpu().numpy()])
    test_acc = clf.score(h[data.test_mask.cpu().numpy()], y[data.test_mask.cpu().numpy()])
    return float(val_acc), float(test_acc)

# Training loop
times = []
for epoch in range(1, 51):
    start = time.time()
    loss = train()
    val_acc, test_acc = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_acc:.4f}, Test: {test_acc:.4f}')
    times.append(time.time() - start)
print(f"Median time per epoch: {torch.tensor(times).median():.4f}s")


Epoch: 001, Loss: 4.7273, Val: 0.4780, Test: 0.5090
Epoch: 002, Loss: 4.1412, Val: 0.5000, Test: 0.5570
Epoch: 003, Loss: 3.9487, Val: 0.5760, Test: 0.6000
Epoch: 004, Loss: 3.8380, Val: 0.5360, Test: 0.5920
Epoch: 005, Loss: 3.8187, Val: 0.5540, Test: 0.6000
Epoch: 006, Loss: 3.7917, Val: 0.5600, Test: 0.5780
Epoch: 007, Loss: 3.7354, Val: 0.5460, Test: 0.5810
Epoch: 008, Loss: 3.7152, Val: 0.5640, Test: 0.5910
Epoch: 009, Loss: 3.7440, Val: 0.5580, Test: 0.5770
Epoch: 010, Loss: 3.7595, Val: 0.6100, Test: 0.6140
Epoch: 011, Loss: 3.7417, Val: 0.5800, Test: 0.6150
Epoch: 012, Loss: 3.7492, Val: 0.5760, Test: 0.5960
Epoch: 013, Loss: 3.7363, Val: 0.5920, Test: 0.5930
Epoch: 014, Loss: 3.7140, Val: 0.5760, Test: 0.5800
Epoch: 015, Loss: 3.7112, Val: 0.5940, Test: 0.5780
Epoch: 016, Loss: 3.6711, Val: 0.5880, Test: 0.5940
Epoch: 017, Loss: 3.6874, Val: 0.5520, Test: 0.5710
Epoch: 018, Loss: 3.7268, Val: 0.5720, Test: 0.5850
Epoch: 019, Loss: 3.7358, Val: 0.5680, Test: 0.5650
Epoch: 020, 