In [1]:
import math
import random
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd
import tqdm
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset, Subset

import torch_geometric
from torch_geometric.data import Data, Batch
from torch_geometric.transforms import LineGraph

from shapely.geometry import LineString

from sklearn.preprocessing import MinMaxScaler, StandardScaler

def create_dataloader(is_train, batch_size, dataset, train_ratio):
    dataset_length = len(dataset)
    print(f"Total dataset length: {dataset_length}")

    # Calculate split index for training and validation
    split_idx = int(dataset_length * train_ratio)
    
    # Calculate the maximum number of samples that fit into complete batches for training and validation
    train_samples = (split_idx // batch_size) * batch_size
    valid_samples = ((dataset_length - split_idx) // batch_size) * batch_size
    if is_train:
        indices = range(0, train_samples)
    else:
        indices = range(split_idx, split_idx + valid_samples)
    sub_dataset = Subset(dataset, indices)
    print(f"{'Training' if is_train else 'Validation'} subset length: {len(sub_dataset)}")
    return DataLoader(dataset=sub_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

class MyGeometricDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list
    def __len__(self):
        return len(self.data_list)
    def __getitem__(self, idx):
        return self.data_list[idx]
    
def collate_fn(data_list):
    return Batch.from_data_list(data_list)

def normalize_data(dataset):
    # Collect all node features
    all_node_features = []
    for data in dataset:
        all_node_features.append(data.x)

    # Stack all node features into a single tensor
    all_node_features = torch.cat(all_node_features, dim=0)
    
    # Fit the min-max scaler on the node features
    scaler = MinMaxScaler()
    scaler.fit(all_node_features)

    # Apply the scaler to each data instance and store as a new feature
    for data in dataset:
        data.normalized_x = torch.tensor(scaler.transform(data.x), dtype=torch.float)

    return dataset

def normalize_positional_features(dataset):
    # Collect all positional features
    all_pos_features = []
    for data in dataset:
        all_pos_features.append(data.pos)

    # Stack all positional features into a single tensor
    all_pos_features = torch.cat(all_pos_features, dim=0)
    
    # Fit the min-max scaler on the positional features
    scaler = MinMaxScaler()
    scaler.fit(all_pos_features)

    # Apply the scaler to each data instance and store as a new feature
    for data in dataset:
        data.normalized_pos = torch.tensor(scaler.transform(data.pos), dtype=torch.float)
    return dataset

def normalize_y_values(dataset):
    # Collect all y values
    all_y_values = []
    for data in dataset:
        all_y_values.append(data.y)

    # Stack all y values into a single tensor
    all_y_values = torch.cat(all_y_values, dim=0)

    # Fit the min-max scaler on the y values
    scaler = MinMaxScaler()
    scaler.fit(all_y_values)

    # Apply the scaler to each data instance and store as a new feature
    for data in dataset:
        data.normalized_y = torch.tensor(scaler.transform(data.y), dtype=torch.float)  # Keep the 2D shape

    return dataset

def normalize_dataset(dataset):
    # Normalize node features
    dataset = normalize_data(dataset)
    # Normalize positional features (if any)
    dataset = normalize_positional_features(dataset)
    # Normalize y values
    dataset = normalize_y_values(dataset)
    return dataset

# Abstract

This is the current working version. The steps are the following:

1. Load data
2. Load model and loss function
3. Split into train and test data
4. Training loop

In [2]:
# Define parameters
num_epochs = 500
batch_size = 20
lr = 0.001
wandb_name = 'gnn_decrease_model_for_one_batch'
train_ratio = 0.8
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

## 1. Load data and create the dataset

In [8]:
class GnnModel(nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(12345)
        self.conv1 = torch_geometric.nn.GCNConv(1, 16)
        # self.conv2 = torch_geometric.nn.GATConv(16, 16)
        self.conv3 = torch_geometric.nn.GCNConv(16, 1)
        # self.conv3 = torch_geometric.nn.GCNConv(16, 1)
        # self.gat1 = torch_geometric.nn.GATConv(16, 16)
        # self.conv4 = torch_geometric.nn.GCNConv(16, 1)
                
        # self.convWithPos = torch_geometric.nn.conv.PointNetConv(1, 16, 3)
        
    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, training=self.training)
        # x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv3(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, training=self.training)
        # x = self.conv3(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, training=self.training)
        # x = self.gat1(x, edge_index)
        # x = F.relu(x)
        # x = F.dropout(x, training=self.training)
        # x = self.conv4(x, edge_index)
        return x

def validate_model(model, valid_dl, loss_func, device):
    model.eval()
    val_loss = 0
    num_batches = 0
    with torch.inference_mode():
        for idx, data in enumerate(valid_dl):
            input_node_features, targets = data.normalized_x.to(device), data.normalized_y.to(device)
            predicted = model(data)
            val_loss += loss_func(predicted, targets).item()
            num_batches += 1
    return val_loss / num_batches if num_batches > 0 else 0

In [9]:
# Load the list of dictionaries
data_dict_list = torch.load('../results/dataset_1pm_0-1382.pt')

# Reconstruct the Data objects
datalist = [Data(x=d['x'], edge_index=d['edge_index'], pos=d['pos'], y=d['y']) for d in data_dict_list]

# Recreate the dataset
dataset = MyGeometricDataset(datalist)

# Apply normalization to your dataset
dataset_normalized = normalize_dataset(dataset)

## 2. Load model and loss function

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
wandb.init(
        project="check_errors",
        config={
            "epochs": num_epochs,
            "batch_size": batch_size,
            "lr": lr,
            # "dropout": 0.15,
            })
config = wandb.config
model = GnnModel().to(device)

# Define loss and optimizer
# optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

loss_fct = torch.nn.MSELoss()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01116761805555547, max=1.0)â€¦

In [11]:
model

GnnModel(
  (conv1): GCNConv(1, 16)
  (conv3): GCNConv(16, 1)
)

In [12]:
asdfadsf

NameError: name 'asdfadsf' is not defined

## 3. Split into train and test set

In [None]:
train_dl = create_dataloader(dataset=dataset_normalized, is_train=True, batch_size=config.batch_size, train_ratio=train_ratio)
valid_dl = create_dataloader(dataset=dataset_normalized, is_train=False, batch_size=config.batch_size, train_ratio=train_ratio)
n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)
print(n_steps_per_epoch)

Total dataset length: 1382
Training subset length: 1100
Total dataset length: 1382
Validation subset length: 260
55


## 4. Train the model

In [None]:
# # Train the model
# for epoch in range(config.epochs):
#     model.train()
#     # data = next(iter(train_dl))
#     # for idx in range(len(train_dl)):
        
#     for idx, data in enumerate(train_dl):
#         input_node_features, targets = data.normalized_x.to(device), data.normalized_y.to(device)
#         predicted = model(data)
#         train_loss = loss_fct(predicted, targets)
#         optimizer.zero_grad()
#         train_loss.backward()
#         optimizer.step()
#         wandb.log({"train_loss": train_loss.item(), "epoch": epoch, "step": idx})
#         print(f"epoch: {epoch}, step: {idx}, loss: {train_loss.item()}")
        
#     val_loss = validate_model(model, valid_dl, loss_fct, device)
#     wandb.log({"val_loss": val_loss})
#     print(f"epoch: {epoch}, val_loss: {val_loss}")
        
# wandb.summary["val_loss"] = val_loss
# wandb.finish()

In [None]:
# class EarlyStopping:
#     def __init__(self, tolerance=5, min_delta=0):
#         self.tolerance = tolerance
#         self.min_delta = min_delta
#         self.counter = 0
#         self.early_stop = False

#     def __call__(self, train_loss, validation_loss):
#         if (validation_loss - train_loss) > self.min_delta:
#             self.counter +=1
#             if self.counter >= self.tolerance:  
#                 self.early_stop = True
# early_stopping = EarlyStopping(tolerance=5, min_delta=10)


In [None]:
# find the average per batch

# Train the model
# mse_loss = 0
# counter = 0
for epoch in range(config.epochs):
    model.train()
    data = next(iter(train_dl))
    for idx in range(len(train_dl)):
        
    # for idx, data in enumerate(train_dl):
        input_node_features, targets = data.normalized_x.to(device), data.normalized_y.to(device)
        # y_values_normalized = np.concatenate([data.normalized_y])
        # y_mean_per_batch = np.mean(y_values_normalized)
        # n = len(data.normalized_y)
        # y_mean_per_batch = torch.tensor(y_mean_per_batch * np.ones((n, 1)))
        # error_per_batch = torch.mean((data.normalized_y - y_mean_per_batch)**2)
        # mse_loss += error_per_batch.item()
        # counter += 1
        # y_mean_per_batch_tensor = torch.tensor(y_mean_per_batch)

        predicted = model(data)
        train_loss = loss_fct(predicted, targets)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        wandb.log({"train_loss": train_loss.item(), "epoch": epoch, "step": idx})
        # print(f"epoch: {epoch}, step: {idx}, loss: {train_loss.item()}")
        
        
        
    val_loss = validate_model(model, valid_dl, loss_fct, device)
    
    # early stopping
    # early_stopping(train_loss, val_loss)
    # if early_stopping.early_stop:
    #   print("We are at epoch:", i)
    #   break
    wandb.log({"val_loss": val_loss})
    print(f"epoch: {epoch}, val_loss: {val_loss}")
        
wandb.summary["val_loss"] = val_loss
wandb.finish()

epoch: 0, val_loss: 36074.207331730766
epoch: 1, val_loss: 24183.85486778846
epoch: 2, val_loss: 78698.20673076923
epoch: 3, val_loss: 102636.74699519231
epoch: 4, val_loss: 101859.34975961539
epoch: 5, val_loss: 90070.79627403847
epoch: 6, val_loss: 76151.65204326923
epoch: 7, val_loss: 63441.45823317308
epoch: 8, val_loss: 52378.12319711538
epoch: 9, val_loss: 61294.62319711538
epoch: 10, val_loss: 45354.18028846154
epoch: 11, val_loss: 34868.485877403844
epoch: 12, val_loss: 27600.67022235577
epoch: 13, val_loss: 22172.89287860577
epoch: 14, val_loss: 18098.572415865383
epoch: 15, val_loss: 14953.845552884615
epoch: 16, val_loss: 12339.248422475961
epoch: 17, val_loss: 10288.269756610576
epoch: 18, val_loss: 8633.09990985577
epoch: 19, val_loss: 7207.927396334135
epoch: 20, val_loss: 6059.212552584135
epoch: 21, val_loss: 5042.471529447115
epoch: 22, val_loss: 4238.04052734375
epoch: 23, val_loss: 3513.580866887019
epoch: 24, val_loss: 2917.111328125
epoch: 25, val_loss: 2395.178297

[E thread_pool.cpp:130] Exception in thread pool task: mutex lock failed: Invalid argument


KeyboardInterrupt: 