In [1]:
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T
import pickle
import pandas as pd
import geopandas as gpd

import torch_geometric
from torch_geometric.data import Data
from torch.utils.data import DataLoader, Dataset
from torch_geometric.transforms import LineGraph

from torch_geometric.data import Batch
from torch_geometric.data import Data, Batch

from shapely.geometry import LineString
import tqdm 
import torch.nn.functional as F

def collate_fn(data_list):
    return Batch.from_data_list(data_list)

# Abstract

This is the current working version.
The steps are the following:

1. Load data
2. Pick a loss function
3. Split into train and test data
4. Training loop

## 1. Load data and create the dataset

In [2]:
with open('../results/results_pop_1pm_first_1400.pkl', 'rb') as f:
    results_dict = pickle.load(f)
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
class MyGeometricDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list
        
    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]

# Create your data objects
datalist = []
counter = 0
for key, df in results_dict.items():
    counter +=1
    if counter > 10:
        break
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        
        nodes = []
        edges = []
        edge_car_volumes = []
        node_to_idx = {}
        capacities = {}
        edge_positions = []

        # Iterate through the rows of the GeoDataFrame
        for idx, row in gdf.iterrows():
            from_node = row['from_node']
            to_node = row['to_node']
            car_volume = row['vol_car']
            capacity = row['capacity']
            
            # Get coordinates from the LINESTRING geometry
            coords = list(row.geometry.coords)
            from_position = coords[0]
            to_position = coords[-1]
            
            # Assign unique indices to nodes
            if from_node not in node_to_idx:
                node_to_idx[from_node] = len(nodes)
                nodes.append(from_node)
                # capacities[node_to_idx[from_node]] = capacity

            if to_node not in node_to_idx:
                node_to_idx[to_node] = len(nodes)
                nodes.append(to_node)
                # capacities[node_to_idx[to_node]] = capacity
            
            # Append edge index and attributes
            edge = (node_to_idx[from_node], node_to_idx[to_node])
            if edge not in edges:
                edges.append(edge)
                edge_car_volumes.append(car_volume)  # Target values
                capacities[edge] = capacity
                
                # Compute edge position (e.g., midpoint)
                edge_position = ((from_position[0] + to_position[0]) / 2, (from_position[1] + to_position[1]) / 2)
                edge_positions.append(edge_position)

        # Convert lists to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_positions_tensor = torch.tensor(edge_positions, dtype=torch.float)
        
        # x = torch.tensor([[capacities[i]] for i in range(len(nodes))], dtype=torch.float)
        x = torch.tensor([[0] for i in range(len(nodes))], dtype=torch.float)
        
        # Create Data object
        target_values = torch.tensor(edge_car_volumes, dtype=torch.float).unsqueeze(1)
        data = Data(edge_index=edge_index, x=x, pos=edge_positions_tensor)
        
        # Transform to line graph
        linegraph_transformation = LineGraph()
        linegraph_data = linegraph_transformation(data)
        
        # Prepare the x for line graph: index and capacity
        linegraph_x = torch.zeros((linegraph_data.num_nodes, 1), dtype=torch.float)
        for i, edge in enumerate(edges):
            capacity = capacities[edge]  
            linegraph_x[i] = capacity
        
        linegraph_data.x = linegraph_x
        
        # Target tensor for car volumes
        linegraph_data.y = torch.tensor(edge_car_volumes, dtype=torch.float).unsqueeze(1)
        
        if linegraph_data.validate(raise_on_error=True):
            datalist.append(linegraph_data)
        else:
            print("Invalid line graph data")
            
dataset = MyGeometricDataset(datalist)

In [20]:
linegraph_data.pos

tensor([[ 2.3386, 48.8518],
        [ 2.3387, 48.8524],
        [ 2.3387, 48.8524],
        ...,
        [ 2.3143, 48.8912],
        [ 2.2712, 48.8380],
        [ 2.2750, 48.8370]])

In [4]:
dataset = MyGeometricDataset(datalist)

## Define the model

In [10]:
class GnnModel(nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(12345)
        self.conv1 = torch_geometric.nn.GCNConv(1, 16)
        self.conv2 = torch_geometric.nn.GCNConv(16, 1)
        
    def forward(self, x):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return x
    
# def get_data_from_dataloader(is_train, batch_size, dataset):
#     # sub_dataset = torch.utils.data.Subset(dataset, range(0,  10))
#     # sub_dataset = torch.utils.data.Subset(dataset, range(0, int(len(dataset) * 0.8)) if is_train else range(int(len(dataset) * 0.2), len(dataset)))
#     sub_dataset = torch.utils.data.Subset(dataset, range(0, 50) if is_train else range(50, 100))
#     return DataLoader(dataset=sub_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)


def get_data_from_dataloader(is_train, batch_size, dataset):
    dataset_length = len(dataset)
    print(f"Total dataset length: {dataset_length}")
    mid_point = dataset_length // 2  # Use integer division to get an integer midpoint

    # Adjust indices based on the actual length of the dataset
    if is_train:
        indices = range(0, mid_point)
    else:
        indices = range(mid_point, dataset_length)
    
    sub_dataset = torch.utils.data.Subset(dataset, indices)
    print(f"{'Training' if is_train else 'Validation'} subset length: {len(sub_dataset)}")
    
    return DataLoader(dataset=sub_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)


def validate_model(model, valid_dl, loss_func, device):
    model.eval()
    val_loss = 0
    with torch.inference_mode():
        for idx, data in enumerate(valid_dl):
            input_node_feats, targets = data.x.to(device), data.y.to(device)
            predicted = model(data)
            print(data.x.shape)
            print(data.y.shape)
            print(predicted.shape)
            print(targets.shape)
            val_loss += loss_func(predicted, targets)*targets.size(0)
    return val_loss 

## Train and test the model

In [15]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
wandb.init(
        project="gnn_3",
        config={
            "epochs": 10,
            "batch_size": 1,
            "lr": 0.001,
            "dropout": random.uniform(0.01, 0.80),
            })
config = wandb.config

# Get the model
model = GnnModel().to(device)

# Define loss and optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
loss_fct = torch.nn.MSELoss()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▅▁

0,1
train_loss,81287.53906


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011167714355461713, max=1.0…

In [16]:
train_dl = get_data_from_dataloader(dataset  = dataset, is_train=True, batch_size=config.batch_size)
valid_dl = get_data_from_dataloader(dataset  = dataset, is_train=False, batch_size=config.batch_size)
n_steps_per_epoch = math.ceil(len(train_dl.dataset) / config.batch_size)

Total dataset length: 10
Training subset length: 5
Total dataset length: 10
Validation subset length: 5


In [17]:
for data in valid_dl:
    print(data.y.shape)

torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])


In [None]:
len(valid_dl.dataset)

50

In [18]:
# Train the model
for epoch in range(config.epochs):
    model.train()
    for step, data in enumerate(train_dl):
        input_node_features, targets = data.x.to(device), data.y.to(device)
        predicted = model(data)
        train_loss = loss_fct(predicted, targets)
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        wandb.log({"train_loss": train_loss.item()})
        print(f"epoch: {epoch}, step: {step}, loss: {train_loss.item()}")
        
    val_loss = validate_model(model, valid_dl, loss_fct, device)
    wandb.log({"val_loss": val_loss})
    print(f"epoch: {epoch}, val_loss: {val_loss}")
        
wandb.summary["val_loss"] = val_loss
wandb.finish()

epoch: 0, step: 0, loss: 105376.6015625
epoch: 0, step: 1, loss: 93920.28125
epoch: 0, step: 2, loss: 87478.40625
epoch: 0, step: 3, loss: 72379.4453125
epoch: 0, step: 4, loss: 65660.2578125
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
epoch: 0, val_loss: 54064980.0
epoch: 1, step: 0, loss: 53334.87109375
epoch: 1, step: 1, loss: 45111.171875
epoch: 1, step: 2, loss: 40740.68359375
epoch: 1, step: 3, loss: 33738.48828125
epoch: 1, step: 4, loss: 30149.044921875
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
torch.Size([31140, 1])
tor

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,█▇▇▆▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_loss,█▂▂▄▂▁▁▁▁▁

0,1
train_loss,246.91742
val_loss,35798748.0


## Analysing the model

In [None]:
# Evaluate the model
# model.eval()
# with torch.no_grad():
#     pred = model(data).cpu()
#     target = data.y.view(-1, 1).cpu()
#     mse = F.mse_loss(pred, target).item()
#     rmse = torch.sqrt(torch.tensor(mse)).item()
#     print(f'Mean Squared Error: {mse:.4f}')
#     print(f'Root Mean Squared Error: {rmse:.4f}')

# # Calculate target value statistics for comparison
# target_values = target.numpy()
# mean_target = target_values.mean()
# std_target = target_values.std()
# min_target = target_values.min()
# max_target = target_values.max()

# print(f'Mean of target values: {mean_target:.4f}')
# print(f'Standard deviation of target values: {std_target:.4f}')
# print(f'Minimum target value: {min_target:.4f}')
# print(f'Maximum target value: {max_target:.4f}')