<a href="https://colab.research.google.com/github/fressbish/Koptev-Dmitry/blob/main/task4_gnn_KoptevDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Коптев Д.А., GNN, Задание 4

# Задача 4. Graph Attention Network

- Найти графовый набор данных для решения задачи предсказания (классификация вершин, обнаружение сообществ и т.д.).
- Использовать несколько слоев GATConv из библиотеки PyG для построения GAT модели.
- Обучить полученную модель, подобрать гиперпараметры (например, learning rate) на валидационной выборке, и оценить качество предсказания на тестовой выборке.
- (+5 баллов) Также найти набор данных, с помощью которого можно получить гетерогенный граф. Использовать несколько слоев RGATConv из библиотеки PyG для построения Relational GAT модели. Разбить данные на тренировочные/валидационные/тестовые с помощью средств PyG, например RandomLinkSplit для задачи Link Prediction. Обучить полученную модель, подобрать гиперпараметры (например, learning rate, multi-head attention) на валидационной выборке, и оценить качество предсказания на тестовой выборке.

In [1]:
# !pip install torch-geometric

In [2]:
import torch
from torch_geometric.datasets import Planetoid
from torch_geometric.transforms import NormalizeFeatures
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.loader import DataLoader
from torch_geometric.nn import RGATConv
import torch.nn.functional as F
from torch.nn.functional import binary_cross_entropy_with_logits

from torch.nn import Linear
from torch_geometric.nn import GATConv

from torch_geometric.loader import DataLoader
from torch.optim import Adam
from torch.nn.functional import cross_entropy

from tqdm import tqdm
from itertools import product
from sklearn.metrics import roc_auc_score, average_precision_score

In [3]:
dataset = Planetoid(root='data/Planetoid', name='Cora', transform=NormalizeFeatures())
graph_data = dataset[0]

# разбиваем на тренировочную, валидационную и тестовую выборки
total_nodes = graph_data.num_nodes
train_split = int(0.6 * total_nodes)
val_split = int(0.8 * total_nodes)

graph_data.train_mask = torch.zeros(total_nodes, dtype=torch.bool)
graph_data.train_mask[:train_split] = True

graph_data.val_mask = torch.zeros(total_nodes, dtype=torch.bool)
graph_data.val_mask[train_split:val_split] = True

graph_data.test_mask = torch.zeros(total_nodes, dtype=torch.bool)
graph_data.test_mask[val_split:] = True

total_nodes, graph_data.num_edges

(2708, 10556)

### GAT-модель

In [4]:
class GAT(torch.nn.Module):
    def __init__(self, in_features, hidden_dim, output_dim, num_heads=1):
        super().__init__()
        # первый слой GAT
        self.gat1 = GATConv(in_features, hidden_dim, heads=num_heads)
        # второй слой GAT
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1)
        # финальный классификатор
        self.fc = Linear(output_dim, dataset.num_classes)

    def forward(self, features, edges):
        h = self.gat1(features, edges).relu()  # активация ReLU
        h = self.gat2(h, edges).relu()  # второй слой
        logits = self.fc(h)  # предсказания
        return logits

In [5]:
# тренировочная функция
def train(model, data, optimizer, loss_fn):
    model.train()
    optimizer.zero_grad()
    predictions = model(data.x, data.edge_index)
    loss = loss_fn(predictions[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

# функция для оценки точности
def accuracy(model, data, mask):
    model.eval()
    predictions = model(data.x, data.edge_index).argmax(dim=1)
    correct = (predictions[mask] == data.y[mask]).sum()
    return int(correct) / int(mask.sum())

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
graph_data = graph_data.to(device)

In [7]:
# гиперпараметры
hidden_dims = [8, 16, 32]
learning_rates = [0.1, 0.05, 0.01]

best_config = None
best_val_acc = 0

for dim in hidden_dims:
    for lr in learning_rates:
        # создаем новую модель
        model = GAT(graph_data.num_features, dim, dim).to(device)
        optimizer = Adam(model.parameters(), lr=lr)
        criterion = torch.nn.CrossEntropyLoss()

        # обучение
        for epoch in tqdm(range(1, 101)):
            train_loss = train(model, graph_data, optimizer, criterion)

        # проверка на валидации
        val_acc = accuracy(model, graph_data, graph_data.val_mask)
        print(f"Hidden Dim: {dim}, LR: {lr}, Val Accuracy: {val_acc:.4f}")

        # сохраняем лучшую конфигурацию
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_config = {"hidden_dim": dim, "lr": lr}

100%|██████████| 100/100 [00:05<00:00, 18.56it/s]


Hidden Dim: 8, LR: 0.1, Val Accuracy: 0.7897


100%|██████████| 100/100 [00:07<00:00, 13.24it/s]


Hidden Dim: 8, LR: 0.05, Val Accuracy: 0.7768


100%|██████████| 100/100 [00:02<00:00, 34.19it/s]


Hidden Dim: 8, LR: 0.01, Val Accuracy: 0.8118


100%|██████████| 100/100 [00:03<00:00, 29.73it/s]


Hidden Dim: 16, LR: 0.1, Val Accuracy: 0.7970


100%|██████████| 100/100 [00:03<00:00, 25.58it/s]


Hidden Dim: 16, LR: 0.05, Val Accuracy: 0.8118


100%|██████████| 100/100 [00:03<00:00, 31.36it/s]


Hidden Dim: 16, LR: 0.01, Val Accuracy: 0.8303


100%|██████████| 100/100 [00:03<00:00, 27.23it/s]


Hidden Dim: 32, LR: 0.1, Val Accuracy: 0.8063


100%|██████████| 100/100 [00:04<00:00, 24.94it/s]


Hidden Dim: 32, LR: 0.05, Val Accuracy: 0.8192


100%|██████████| 100/100 [00:05<00:00, 19.84it/s]

Hidden Dim: 32, LR: 0.01, Val Accuracy: 0.8339





In [8]:
# обучим модель
print(best_config)
best_dim = best_config["hidden_dim"]
best_lr = best_config["lr"]

model = GAT(graph_data.num_features, best_dim, best_dim).to(device)
optimizer = Adam(model.parameters(), lr=best_lr)
criterion = torch.nn.CrossEntropyLoss()

{'hidden_dim': 32, 'lr': 0.01}


In [9]:
max_epochs = 100
print("\nTraining final model...")
for epoch in range(max_epochs):
    loss = train(model, graph_data, optimizer, criterion)
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, Loss: {loss:.4f}")


Training final model...
Epoch: 0, Loss: 1.9894
Epoch: 10, Loss: 1.7795
Epoch: 20, Loss: 1.5304
Epoch: 30, Loss: 1.0829
Epoch: 40, Loss: 0.7330
Epoch: 50, Loss: 0.4613
Epoch: 60, Loss: 0.2686
Epoch: 70, Loss: 0.1633
Epoch: 80, Loss: 0.1036
Epoch: 90, Loss: 0.0656


In [10]:
accuracy(model, graph_data, graph_data.val_mask), accuracy(model, graph_data, graph_data.test_mask)

(0.8081180811808119, 0.8634686346863468)

Получили неплохое качество, 81% на валидационных данных и 86% на тестовых данных

## 2 часть. Relational GAT

In [11]:
dataset = Planetoid(root='data/Planetoid', name='Citeseer')
graph_data = dataset[0]

In [12]:
# разбиваем данные на train/val/test
splitter = RandomLinkSplit(
    num_val=0.1,
    num_test=0.1,
    is_undirected=True,
    add_negative_train_samples=True,
)
train_graph, val_graph, test_graph = splitter(graph_data)

print("Train data:", train_graph)
print("Validation data:", val_graph)
print("Test data:", test_graph)

Train data: Data(x=[3327, 3703], edge_index=[2, 7284], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327], edge_label=[7284], edge_label_index=[2, 7284])
Validation data: Data(x=[3327, 3703], edge_index=[2, 7284], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327], edge_label=[910], edge_label_index=[2, 910])
Test data: Data(x=[3327, 3703], edge_index=[2, 8194], y=[3327], train_mask=[3327], val_mask=[3327], test_mask=[3327], edge_label=[910], edge_label_index=[2, 910])


## Строим модель

In [13]:
class RelGAT(torch.nn.Module):
    def __init__(self, in_channels, hidden_dim, out_dim, num_rels):
        super().__init__()
        self.rgat1 = RGATConv(in_channels, hidden_dim, num_relations=num_rels)
        self.rgat2 = RGATConv(hidden_dim, out_dim, num_relations=num_rels)
        self.predictor = torch.nn.Linear(out_dim, 1)

    def forward(self, x, edge_index, edge_types):
        x = torch.relu(self.rgat1(x, edge_index, edge_types))
        x = self.rgat2(x, edge_index, edge_types)
        return self.predictor(x)

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
train_graph = train_graph.to(device)
val_graph = val_graph.to(device)
test_graph = test_graph.to(device)

In [16]:
# гиперпараметры
hidden_dims = [8, 16, 32]
learning_rates = [0.1, 0.05, 0.01]
results = []

for hidden_dim, learning_rate in product(hidden_dims, learning_rates):
    print(f"hidden_dim={hidden_dim}, learning_rate={learning_rate}")
    model = RelGAT(
        in_channels=graph_data.num_features,
        hidden_dim=hidden_dim,
        out_dim=16,
        num_rels=1,  # для сiteseer используем 1 тип ребер
    ).to(device)

    # настройка оптимизатора и функции потерь
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = torch.nn.BCEWithLogitsLoss()

    # обучение
    model.train()
    for epoch in tqdm(range(1, 11)): # уменьшим кол-во эпох, иначе слишком долго идет обучение
        optimizer.zero_grad()
        # Forward pass
        pred = model(
            train_graph.x,
            train_graph.edge_index,
            torch.zeros(train_graph.edge_index.size(1), dtype=torch.long).to(device),
        )
        pred = pred[train_graph.edge_label_index[0]].squeeze()
        loss = criterion(pred, train_graph.edge_label.float())
        loss.backward()
        optimizer.step()

    # Оценка на тестовом наборе
    model.eval()
    with torch.no_grad():
        test_pred = model(
            test_graph.x,
            test_graph.edge_index,
            torch.zeros(test_graph.edge_index.size(1), dtype=torch.long).to(device),
        )
        test_pred = test_pred[test_graph.edge_label_index[0]].squeeze().cpu().numpy()
        test_labels = test_graph.edge_label.cpu().numpy()

    # считаем качество
    auc_score = roc_auc_score(test_labels, test_pred)
    avg_precision = average_precision_score(test_labels, test_pred)
    print(f"Results: Hidden_dim={hidden_dim}, LR={learning_rate}, AUC-ROC={auc_score:.4f}, AP={avg_precision:.4f}")

    results.append({
        "hidden_dim": hidden_dim,
        "learning_rate": learning_rate,
        "auc_score": auc_score,
        "avg_precision": avg_precision,
    })

hidden_dim=8, learning_rate=0.1


100%|██████████| 10/10 [00:51<00:00,  5.11s/it]


Results: Hidden_dim=8, LR=0.1, AUC-ROC=0.5177, AP=0.5285
hidden_dim=8, learning_rate=0.05


100%|██████████| 10/10 [00:36<00:00,  3.63s/it]


Results: Hidden_dim=8, LR=0.05, AUC-ROC=0.5993, AP=0.6235
hidden_dim=8, learning_rate=0.01


100%|██████████| 10/10 [00:30<00:00,  3.03s/it]


Results: Hidden_dim=8, LR=0.01, AUC-ROC=0.6093, AP=0.6390
hidden_dim=16, learning_rate=0.1


100%|██████████| 10/10 [00:58<00:00,  5.86s/it]


Results: Hidden_dim=16, LR=0.1, AUC-ROC=0.5671, AP=0.5678
hidden_dim=16, learning_rate=0.05


100%|██████████| 10/10 [01:09<00:00,  6.96s/it]


Results: Hidden_dim=16, LR=0.05, AUC-ROC=0.5855, AP=0.6212
hidden_dim=16, learning_rate=0.01


100%|██████████| 10/10 [00:58<00:00,  5.88s/it]


Results: Hidden_dim=16, LR=0.01, AUC-ROC=0.6235, AP=0.6247
hidden_dim=32, learning_rate=0.1


100%|██████████| 10/10 [01:49<00:00, 10.99s/it]


Results: Hidden_dim=32, LR=0.1, AUC-ROC=0.5819, AP=0.5524
hidden_dim=32, learning_rate=0.05


100%|██████████| 10/10 [01:47<00:00, 10.77s/it]


Results: Hidden_dim=32, LR=0.05, AUC-ROC=0.6212, AP=0.6264
hidden_dim=32, learning_rate=0.01


100%|██████████| 10/10 [01:49<00:00, 10.99s/it]


Results: Hidden_dim=32, LR=0.01, AUC-ROC=0.6294, AP=0.6294


In [17]:
# выбираем лучшую конфигурацию
best_config = max(results, key=lambda x: x["auc_score"])
best_config

{'hidden_dim': 32,
 'learning_rate': 0.01,
 'auc_score': 0.6294070764400435,
 'avg_precision': 0.6294409877745013}

In [18]:
# финальное обучение на лучших параметрах
best_model = RelGAT(
    in_channels=graph_data.num_features,
    hidden_dim=best_config["hidden_dim"],
    out_dim=16,
    num_rels=1,
).to(device)

In [19]:
optimizer = torch.optim.Adam(best_model.parameters(), lr=best_config["learning_rate"])
criterion = torch.nn.BCEWithLogitsLoss()

best_model.train()
for epoch in tqdm(range(1, 101)):
    optimizer.zero_grad()
    pred = best_model(
        train_graph.x,
        train_graph.edge_index,
        torch.zeros(train_graph.edge_index.size(1), dtype=torch.long).to(device),
    )
    pred = pred[train_graph.edge_label_index[0]].squeeze()
    loss = criterion(pred, train_graph.edge_label.float())
    loss.backward()
    optimizer.step()

100%|██████████| 100/100 [18:20<00:00, 11.01s/it]


In [20]:
best_model.eval()
with torch.no_grad():
    test_pred = best_model(
        test_graph.x,
        test_graph.edge_index,
        torch.zeros(test_graph.edge_index.size(1), dtype=torch.long).to(device),
    )
    test_pred = test_pred[test_graph.edge_label_index[0]].squeeze().cpu().numpy()
    test_labels = test_graph.edge_label.cpu().numpy()

roc_auc = roc_auc_score(test_labels, test_pred)
avg_precision = average_precision_score(test_labels, test_pred)

roc_auc, avg_precision

(0.5744547759932375, 0.5671208788673803)