In [9]:
import numpy as np
import pandas as pd
import networkx as nx

import sys

sys.path.append("../")
import utils
from gcn_models import GIN, GATC, SAGE

from torch.utils.data import Dataset
from torch_geometric.utils.convert import from_networkx
from os import path
import torch
import os
import numpy as np

import torch
import torch.nn.functional as F

from torch_geometric.loader import DataLoader
from sklearn.metrics import f1_score

In [10]:
early_stop_thresh = 25
best_macro_f1 = -1


train_dataset = utils.GraphDataset(
    "../data/", "MixedShapesSmallTrain_TRAIN", True, n_quantiles=100
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = utils.GraphDataset(
    "../data/", "MixedShapesSmallTrain_TEST", True, n_quantiles=100
)
test_loader = DataLoader(test_dataset, batch_size=64)


model = GIN(1, [8, 16, 32, 32, 32, 16, 16], pool_method="add")

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, patience=5, mode="min", cooldown=2
)





In [11]:
def train():
    model.train()

    total_loss = 0
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x.to(torch.float32), data.edge_index, data.batch)
        loss = F.cross_entropy(out, data.y - 1)
        loss.backward()
        optimizer.step()
        total_loss += float(loss) * data.num_graphs
    return total_loss / len(train_loader.dataset)


@torch.no_grad()
def test(loader, return_loss=True):
    model.eval()
    y_pred = []
    y_true = []
    loss = 0
    for data in loader:
        y_out = model(data.x.to(torch.float32), data.edge_index, data.batch)
        y_pred.append(y_out.argmax(dim=-1))
        y_true.append(data.y - 1)
        if return_loss:
            loss += float(F.cross_entropy(y_out, data.y - 1)) * data.num_graphs
    y_pred = np.concatenate(y_pred)
    y_true = np.concatenate(y_true)
    if return_loss:
        return f1_score(y_true=y_true, y_pred=y_pred, average="macro"), loss / len(
            loader.dataset
        )
    return f1_score(y_true=y_true, y_pred=y_pred, average="macro")

In [12]:
for epoch in range(1, 501):
    train()
    train_macro_f1, train_loss = test(train_loader)
    test_macro_f1, test_loss = test(test_loader, True)
    scheduler.step(train_loss)
    print(
        f"Epoch: {epoch:03d}, Train_Loss: {train_loss:02.4f}, Train: {train_macro_f1:01.4f}, Test_Loss: {test_loss:02.4f}, Test: {test_macro_f1:01.4f}"
    )
    if test_macro_f1 > best_macro_f1:
        best_accuracy = test_macro_f1
        best_epoch = epoch
        torch.save(model.state_dict(), "../data/best_model.pth")
    elif epoch - best_epoch > early_stop_thresh:
        print(
            f"Early stopped training at epoch {epoch} best macro F1 {best_macro_f1} in {best_epoch}"
        )
        break

Epoch: 001, Train_Loss: 20.3010, Train: 0.2076, Test_Loss: 19.4301, Test: 0.1978
Epoch: 002, Train_Loss: 35.5605, Train: 0.1841, Test_Loss: 25.6824, Test: 0.1920
Epoch: 003, Train_Loss: 22.1005, Train: 0.2122, Test_Loss: 15.6819, Test: 0.1865
Epoch: 004, Train_Loss: 3.8044, Train: 0.2164, Test_Loss: 3.8729, Test: 0.2165
Epoch: 005, Train_Loss: 4.2472, Train: 0.1828, Test_Loss: 4.2155, Test: 0.1753
Epoch: 006, Train_Loss: 2.5106, Train: 0.1581, Test_Loss: 2.6406, Test: 0.1561
Epoch: 007, Train_Loss: 1.9503, Train: 0.2080, Test_Loss: 1.8451, Test: 0.1683
Epoch: 008, Train_Loss: 1.7603, Train: 0.0667, Test_Loss: 1.6410, Test: 0.0799
Epoch: 009, Train_Loss: 1.7844, Train: 0.0667, Test_Loss: 1.6603, Test: 0.0782
Epoch: 010, Train_Loss: 1.6448, Train: 0.1819, Test_Loss: 1.6096, Test: 0.1603
Epoch: 011, Train_Loss: 1.6011, Train: 0.1984, Test_Loss: 1.5907, Test: 0.1620
Epoch: 012, Train_Loss: 1.5937, Train: 0.1978, Test_Loss: 1.5828, Test: 0.1741
Epoch: 013, Train_Loss: 1.5781, Train: 0.2124,

KeyboardInterrupt: 