In [None]:
import numpy as np
import pandas as pd
import torch
import os
import plotly.graph_objects as go

import dataset_loader
import utils
import config
import models

In [None]:
seed = 42
default_tensor_data_type = torch.float32

torch.manual_seed(seed)
np.random.seed(seed)
torch.set_default_dtype(default_tensor_data_type)

if torch.cuda.is_available():
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

torch.autograd.set_detect_anomaly(True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
models_mapper = {
    "GraphUnet": models.GraphUnet
}
learning_rate = 0.01
self_loops_learning_rate = 0.1
num_epoch = 200

In [None]:
def create_directory_structure(directory_path):
    try:
        os.makedirs(directory_path)
        print(f"Directory structure created: {directory_path}")
    except FileExistsError:
        print(f"Directory already exists: {directory_path}")


def plot_and_save(fig, title, x_title, y_title, file_path):
    fig.update_layout(title=title, xaxis_title=x_title, yaxis_title=y_title)
    fig.write_image(file_path)


def plot_loss_accuracy(
    experiment_name,
    results_target,
    loss_train_history,
    loss_val_history,
    loss_test_history,
    acc_train_history,
    acc_val_history,
    acc_test_history
):
    create_directory_structure(results_target)

    # Loss Plot
    loss_fig = go.Figure()
    loss_fig.add_trace(go.Scatter(x=list(range(len(loss_train_history))),
                       y=loss_train_history, mode='lines', name='Training Loss'))
    loss_fig.add_trace(go.Scatter(x=list(range(len(loss_val_history))),
                       y=loss_val_history, mode='lines', name='Validation Loss'))
    loss_fig.add_trace(go.Scatter(x=list(range(len(loss_test_history))),
                       y=loss_test_history, mode='lines', name='Test Loss'))

    loss_file_path = f'{results_target}/loss_plot.png'
    plot_and_save(loss_fig, f'[{experiment_name}] Loss',
                  'Epoch', 'Loss', loss_file_path)

    # Accuracy Plot
    acc_fig = go.Figure()
    acc_fig.add_trace(go.Scatter(x=list(range(len(acc_train_history))),
                      y=acc_train_history, mode='lines', name='Training Accuracy'))
    acc_fig.add_trace(go.Scatter(x=list(range(len(acc_val_history))),
                      y=acc_val_history, mode='lines', name='Validation Accuracy'))
    acc_fig.add_trace(go.Scatter(x=list(range(len(acc_test_history))),
                      y=acc_test_history, mode='lines', name='Test Accuracy'))

    acc_file_path = f'{results_target}/accuracy_plot.png'
    plot_and_save(acc_fig, f'[{experiment_name}] Accuracy',
                  'Epoch', 'Accuracy', acc_file_path)


def update_csv(csv_filename, data):
    # Check if the CSV file exists
    try:
        # Read the existing CSV file
        df = pd.read_csv(csv_filename)
    except FileNotFoundError:
        df = pd.DataFrame(
            columns=[
                'Experiment Name',
                'epoch',
                'loss_train',
                'loss_test',
                'loss_val'
                'acc_train',
                'acc_test',
                'acc_val',
            ]
        )

    df = pd.concat([df, pd.DataFrame(data, index=[0])], ignore_index=True)
    df.to_csv(csv_filename, index=False)

In [None]:
def run_experiment(experiment_config: dict):
    experiment_name = experiment_config["name"]
    model_name = experiment_config["model"]
    model_class = models_mapper[model_name]

    dataset_name = experiment_config["dataset"]

    data = dataset_loader.load_dataset(dataset_name)
    train_mask = data.train_mask
    val_mask = data.val_mask
    test_mask = data.test_mask
    num_nodes = data.x.shape[0]
    in_features = data.x.shape[1]
    output_classes = len(torch.unique(data.y))
    A = utils.create_A(data.edge_index).to(
        device=device, dtype=default_tensor_data_type)
    X = data.x.to(device=device, dtype=default_tensor_data_type)
    y = data.y.to(device=device, dtype=default_tensor_data_type)

    model_args = experiment_config["model_args"]
    model_args.update(
        {
            "num_nodes": num_nodes,
            "in_features": in_features,
            "output_classes": output_classes
        }
    )

    model = model_class(**model_args)

    # Global optimizer
    # Separate parameters into two groups
    model_parameters = [param for name, param in model.named_parameters(
    ) if "self_loops.mask" not in name]
    self_loops_parameters = [
        param for name, param in model.named_parameters() if "self_loops.mask" in name]

    # Global optimizer
    optimizer = torch.optim.Adam([
        {'params': model_parameters},
        {'params': self_loops_parameters, 'lr': self_loops_learning_rate}
    ], lr=learning_rate)

    loss_train_history = []
    loss_val_history = []
    loss_test_history = []

    acc_train_history = []
    acc_val_history = []
    acc_test_history = []

    for epoch in range(1, num_epoch+1):
        # allow model parameters to be learned
        model.train()

        y_pred = model(X, A)
        # we will compute the loss only with respect to train data
        y_true_train: torch.Tensor = y[train_mask]
        y_pred_train: torch.Tensor = y_pred[train_mask]
        loss_train = utils.compute_loss(y_true_train, y_pred_train)

        acc_train = utils.compute_accuracy(y_true_train, y_pred_train)
        loss_train_history.append(loss_train.item())
        acc_train_history.append(acc_train)

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss_train.backward()
        optimizer.step()
        loss_train.detach()

        # model performance on val/test data
        with torch.no_grad():
            y_true_val = y[val_mask]
            y_pred_val = y_pred[val_mask]
            loss_val = utils.compute_loss(y_true_val, y_pred_val)
            acc_val = utils.compute_accuracy(y_true_val, y_pred_val)
            loss_val_history.append(loss_val.item())
            acc_val_history.append(acc_val)
            loss_val.detach()

            y_true_test = y[test_mask]
            y_pred_test = y_pred[test_mask]

            loss_test = utils.compute_loss(y_true_test, y_pred_test)
            acc_test = utils.compute_accuracy(y_true_test, y_pred_test)
            loss_test_history.append(loss_test.item())
            acc_test_history.append(acc_test)
            loss_test.detach()

    results_target = f"results/{experiment_name}"
    plot_loss_accuracy(
        experiment_name,
        results_target,
        loss_train_history,
        loss_val_history,
        loss_test_history,
        acc_train_history,
        acc_val_history,
        acc_test_history
    )
    last_epoch_results = {
        'Experiment Name': experiment_name,
        'epoch': epoch,
        'loss_train': loss_train_history[-1],
        'loss_test': loss_test_history[-1],
        'loss_val': loss_val_history[-1],
        'acc_train': acc_train_history[-1],
        'acc_test': acc_test_history[-1],
        'acc_val': acc_val_history[-1]
    }
    update_csv("results/overview.csv", last_epoch_results)

In [None]:
for experiment_config in config.EXPERIMENTS_CONFIG:
    run_experiment(experiment_config)