In [1]:
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T
import pickle
import pandas as pd
import geopandas as gpd

import gnn_io

import torch_geometric
from torch_geometric.data import Data
from torch.utils.data import DataLoader, Dataset
from torch_geometric.transforms import LineGraph

from torch_geometric.data import Batch
from torch_geometric.data import Data, Batch

from shapely.geometry import LineString
import tqdm 
import torch.nn.functional as F

def collate_fn(data_list):
    return Batch.from_data_list(data_list)

# Abstract

This is the current working version.
The steps are the following:

1. Load data
2. Pick a loss function
3. Split into train and test data
4. Training loop

## 1. Load data and create the dataset

In [2]:
with open('../results/results_pop_1pct_toy_example.pkl', 'rb') as f:
    results_dict = pickle.load(f)

In [3]:
class MyGeometricDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list

    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]

# Create your data objects
datalist = []
for key, df in results_dict.items():
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        
        nodes = []
        edges = []
        edge_car_volumes = []
        node_to_idx = {}
        capacities = {}
        edge_positions = []

        # Iterate through the rows of the GeoDataFrame
        for idx, row in gdf.iterrows():
            from_node = row['from_node']
            to_node = row['to_node']
            car_volume = row['vol_car']
            capacity = row['capacity']
            
            # Get coordinates from the LINESTRING geometry
            coords = list(row.geometry.coords)
            from_position = coords[0]
            to_position = coords[-1]
            
            # Assign unique indices to nodes
            if from_node not in node_to_idx:
                node_to_idx[from_node] = len(nodes)
                nodes.append(from_node)
                capacities[node_to_idx[from_node]] = capacity

            if to_node not in node_to_idx:
                node_to_idx[to_node] = len(nodes)
                nodes.append(to_node)
                capacities[node_to_idx[to_node]] = capacity
            
            # Append edge index and attributes
            edge = (node_to_idx[from_node], node_to_idx[to_node])
            if edge not in edges:
                edges.append(edge)
                edge_car_volumes.append(car_volume)  # Target values
                
                # Compute edge position (e.g., midpoint)
                edge_position = ((from_position[0] + to_position[0]) / 2, (from_position[1] + to_position[1]) / 2)
                edge_positions.append(edge_position)

        # Convert lists to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_positions_tensor = torch.tensor(edge_positions, dtype=torch.float)
        
        x = torch.tensor([[capacities[i]] for i in range(len(nodes))], dtype=torch.float)
        
        # Create Data object
        data = Data(edge_index=edge_index, x=x)
        
        # Transform to line graph
        linegraph_transformation = LineGraph()
        linegraph_data = linegraph_transformation(data)
        
        # Prepare the x for line graph: index and capacity
        linegraph_x = torch.zeros((linegraph_data.num_nodes, 2), dtype=torch.float)
        
        for i, (from_idx, to_idx) in enumerate(edges):
            capacity = capacities[from_idx]  # Assuming capacity is the same for from and to node
            linegraph_x[i, 0] = i  # Index
            linegraph_x[i, 1] = capacity
        
        linegraph_data.x = linegraph_x
        
        # Target tensor for car volumes
        linegraph_data.y = torch.tensor(edge_car_volumes, dtype=torch.float).unsqueeze(1)
        
        if linegraph_data.validate(raise_on_error=True):
            datalist.append(linegraph_data)
        else:
            print("Invalid line graph data")

In [4]:
# Instantiate the dataset
dataset = MyGeometricDataset(datalist)

# Usage with DataLoader
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

# Example of iterating through the DataLoader
for data in dataloader:
    print(data)
    print(data.x[0:10])
    print(data.y[0:10])
    print(data.edge_index[0:10])
    break

DataBatch(edge_index=[2, 59851], num_nodes=31559, x=[31559, 2], y=[31559, 1], batch=[31559], ptr=[2])
tensor([[  0., 480.],
        [  1., 480.],
        [  2., 480.],
        [  3., 960.],
        [  4., 960.],
        [  5., 480.],
        [  6., 480.],
        [  7., 240.],
        [  8., 480.],
        [  9., 480.]])
tensor([[ 55.],
        [ 54.],
        [ 22.],
        [ 60.],
        [ 68.],
        [ 61.],
        [ 61.],
        [  2.],
        [135.],
        [ 42.]])
tensor([[    0,     1,     1,  ..., 31557, 31558, 31558],
        [    2, 13470, 13471,  ..., 31555, 31557, 31558]])


## Define the model

In [5]:
class GnnModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = torch_geometric.nn.GCNConv(2, 16)
        self.conv2 = torch_geometric.nn.GCNConv(16, 1)
        # self.layers = nn.Sequential(
        # nn.Linear(3, 64),
        # nn.ReLU(),
        # nn.Linear(64, 32),
        # nn.ReLU(),
        # nn.Linear(32, 1)
        # )
        
    def forward(self, x):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

## Train and test the model

In [6]:
# Initalize the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GnnModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Train the model
model.train()
for epoch in range(10):
    for data in dataloader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = F.mse_loss(out, data.y.view(-1, 1))
        loss.backward()
        optimizer.step()
        
# Evaluate the model
model.eval()
pred = model(data)
mse = F.mse_loss(pred, data.y.view(-1, 1)).item()
print(f'Mean Squared Error: {mse:.4f}')

Mean Squared Error: 20835.3457


## Analysing the model

In [7]:
# Evaluate the model
model.eval()
with torch.no_grad():
    pred = model(data).cpu()
    target = data.y.view(-1, 1).cpu()
    mse = F.mse_loss(pred, target).item()
    rmse = torch.sqrt(torch.tensor(mse)).item()
    print(f'Mean Squared Error: {mse:.4f}')
    print(f'Root Mean Squared Error: {rmse:.4f}')

# Calculate target value statistics for comparison
target_values = target.numpy()
mean_target = target_values.mean()
std_target = target_values.std()
min_target = target_values.min()
max_target = target_values.max()

print(f'Mean of target values: {mean_target:.4f}')
print(f'Standard deviation of target values: {std_target:.4f}')
print(f'Minimum target value: {min_target:.4f}')
print(f'Maximum target value: {max_target:.4f}')

Mean Squared Error: 20835.3457
Root Mean Squared Error: 144.3445
Mean of target values: 51.4052
Standard deviation of target values: 134.8809
Minimum target value: 0.0000
Maximum target value: 1593.0000


In [8]:
mse/len(data.y)

0.6602029754784688