In [1]:
import math
import numpy as np
import wandb

import torch
import torch_geometric
from torch_geometric.data import Data

from gnn_architectures import MyGnn

import gnn_architectures as garch

import sys
import os

# Add the 'scripts' directory to the Python path
scripts_path = os.path.abspath(os.path.join('..'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

# Now you can import the gnn_io module
import gnn_io as gio

import pprint

## 1. Define model and parameters

In [2]:
wandb.login()

# Define parameters 
num_epochs = 1000
project_name = 'test_target_is_difference_y_normalized'
train_ratio = 0.8

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

early_stopping = gio.EarlyStopping(patience=10, verbose=True)
torch.set_printoptions(precision=4, sci_mode=False)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


## 2. Load data

In [3]:
# Load the list of dictionaries
data_dict_list = torch.load('../../data/train_data/dataset_1pm_0-1400_more_features_target_is_difference.pt')

# Reconstruct the Data objects
datalist = [Data(x=d['x'], edge_index=d['edge_index'], pos=d['pos'], y=d['y']) for d in data_dict_list]

In [4]:
datalist[0]

Data(x=[31216, 6], edge_index=[2, 59135], y=[31216, 1], pos=[31216, 2])

In [5]:
datalist[0].y

tensor([[ 0.9259],
        [-0.1481],
        [ 2.0000],
        ...,
        [ 0.0000],
        [ 0.0000],
        [ 0.0000]])

In [6]:
dataset_only_relevant_dimensions = gio.cut_dimensions(dataset=datalist, indices_of_dimensions_to_keep=[0, 1, 2])
dataset_normalized = gio.normalize_dataset(dataset_only_relevant_dimensions)

In [7]:
baseline_error = gio.compute_baseline_error_difference_loss(dataset_normalized)
print(f'Baseline error: {baseline_error}')

Baseline error: 0.13565373420715332


## 4. Train the model

In [8]:
def train(model, config=None, loss_fct=None, optimizer=None, train_dl=None, valid_dl=None, device=None, early_stopping=None):
    for epoch in range(config.epochs):
        model.train()
        for idx, data in tqdm(enumerate(train_dl)):
            input_node_features, targets = data.x.to(device), data.y.to(device)
            optimizer.zero_grad()

            # Forward pass
            predicted = model(data)
            train_loss = loss_fct(predicted, targets)
            
            # Backward pass
            train_loss.backward()
            optimizer.step()
            
            wandb.log({"train_loss": train_loss.item(), "epoch": epoch, "step": idx})
            # print(f"epoch: {epoch}, step: {idx}, loss: {train_loss.item()}")
        
        val_loss = garch.validate_model_pos_features(model, valid_dl, loss_fct, device)
        print(f"epoch: {epoch}, validation loss: {val_loss}")
        wandb.log({"loss": val_loss, "epoch": epoch})
            
        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered. Stopping training.")
            break
    
    print("Best validation loss: ", val_loss)
    wandb.summary["val_loss"] = val_loss
    wandb.finish()
    return val_loss, epoch

In [9]:
# Currently best architecture ! 

batch_size = 16
output_layer_parameter = 'gat'
hidden_size_parameter = 16
gat_layer_parameter = 0
gcn_layer_parameter = 0
lr = 0.001

wandb.login()

train_dl = gio.create_dataloader(dataset=dataset_normalized, is_train=True, batch_size=batch_size, train_ratio=train_ratio)
valid_dl = gio.create_dataloader(dataset=dataset_normalized, is_train=False, batch_size=batch_size, train_ratio=train_ratio)

wandb.init(
    project=project_name,
    config={
        "epochs": num_epochs,
        "batch_size": batch_size,
        "lr": lr,
        "early_stopping_patience": 10,
        "hidden_layer_size": hidden_size_parameter,
        "gat_layers": gat_layer_parameter,
        "gcn_layers": gcn_layer_parameter,
        "output_layer": output_layer_parameter,
        # "dropout": 0.15,
    }
)
config = wandb.config

print("output_layer: ", output_layer_parameter)
print("hidden_size: ", hidden_size_parameter)
print("gat_layers: ", gat_layer_parameter)
print("gcn_layers: ", gcn_layer_parameter)

gnn_instance = MyGnn(in_channels=5, out_channels=1, hidden_size=hidden_size_parameter, gat_layers=gat_layer_parameter, gcn_layers=gcn_layer_parameter, output_layer=output_layer_parameter)
model = gnn_instance.to(device)

best_val_loss, best_epoch = train(model, config=config, 
                                loss_fct=torch.nn.MSELoss(), 
                                optimizer=torch.optim.Adam(model.parameters(), lr=lr),
                                train_dl=train_dl, valid_dl=valid_dl,
                                device=device, early_stopping=early_stopping)

Total dataset length: 1382
Training subset length: 1104
Total dataset length: 1382
Validation subset length: 272


output_layer:  gat
hidden_size:  16
gat_layers:  0
gcn_layers:  0
Model initialized
MyGnn(
  (pointLayer): PointNetConv(local_nn=Sequential(
    (0): Linear(in_features=5, out_features=16, bias=True)
    (1): ReLU()
    (2): Linear(in_features=16, out_features=16, bias=True)
  ), global_nn=Sequential(
    (0): Linear(in_features=16, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=16, bias=True)
  ))
  (output_layer): GATConv(16, 1, heads=1)
)
epoch: 0, validation loss: 0.0006235310129876085
epoch: 1, validation loss: 0.0006190928021062384
epoch: 2, validation loss: 0.0006174798301585457
epoch: 3, validation loss: 0.0006167645470294006
epoch: 4, validation loss: 0.0006161892670206726
epoch: 5, validation loss: 0.0006157086292446098


In [None]:
# torch.save(model, '../data/trained_models/model_with_features_base_flow_and_highway_classification_target_is_difference.pth')