In [8]:
import math
import random
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd
import tqdm
import wandb

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset, Subset

import torch_geometric
from torch_geometric.data import Data, Batch
from torch_geometric.transforms import LineGraph

from shapely.geometry import LineString

# Abstract

Here we generate the data, and in the notebook gnn_for_policy_traffic_prediction_2 we do the model.

## 1. Load data and create the dataset

In [3]:
with open('../results/results_pop_1pm_first_1400.pkl', 'rb') as f:
    results_dict = pickle.load(f)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33menatterer[0m ([33mtum-traffic-engineering[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [23]:
# class GnnModel(nn.Module):
#     def __init__(self):
#         super().__init__()
#         torch.manual_seed(12345)
#         self.conv1 = torch_geometric.nn.GCNConv(1, 16)
#         self.conv2 = torch_geometric.nn.GCNConv(16, 1)
        
#     def forward(self, x):
#         x, edge_index = data.x, data.edge_index
#         x = self.conv1(x, edge_index)
#         x = F.relu(x)
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index)
#         return x

# def validate_model(model, valid_dl, loss_func, device):
#     model.eval()
#     val_loss = 0
#     with torch.inference_mode():
#         for idx, data in enumerate(valid_dl):
#             input_node_features, targets = data.x.to(device), data.y.to(device)
#             predicted = model(data)
#             # val_loss += loss_func(predicted, targets)*targets.size(0)
#             val_loss += loss_func(predicted, targets)
#     return val_loss 


        # input_node_features, targets = data.x.to(device), data.y.to(device)
        # predicted = model(data)
        # train_loss = loss_fct(predicted, targets)
        # optimizer.zero_grad()
        # train_loss.backward()
        # optimizer.step()
        # wandb.log({"train_loss": train_loss.item(), "epoch": epoch, "step": step})

# def create_dataloader(is_train, batch_size, dataset):
#     dataset_length = len(dataset)
#     print(f"Total dataset length: {dataset_length}")

#     # Calculate split index for training and validation
#     split_idx = int(dataset_length * train_ratio)
    
#     # Calculate the maximum number of samples that fit into complete batches for training and validation
#     train_samples = (split_idx // batch_size) * batch_size
#     valid_samples = ((dataset_length - split_idx) // batch_size) * batch_size

#     if is_train:
#         indices = range(0, train_samples)
#     else:
#         indices = range(split_idx, split_idx + valid_samples)
    
#     sub_dataset = Subset(dataset, indices)
#     print(f"{'Training' if is_train else 'Validation'} subset length: {len(sub_dataset)}")
#     return DataLoader(dataset=sub_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

class MyGeometricDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list
        
    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        return self.data_list[idx]
    
def collate_fn(data_list):
    return Batch.from_data_list(data_list)

In [5]:
# Create data objects
datalist = []
counter = 0
linegraph_transformation = LineGraph()

for key, df in results_dict.items():
    counter += 1
    if isinstance(df, pd.DataFrame):
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        gdf.crs = "EPSG:2154"  # Assuming the original CRS is EPSG:2154
        gdf.to_crs("EPSG:4326", inplace=True)
        
        # Create dictionaries for nodes and edges
        nodes = pd.concat([gdf['from_node'], gdf['to_node']]).unique()
        node_to_idx = {node: idx for idx, node in enumerate(nodes)}
        
        gdf['from_idx'] = gdf['from_node'].map(node_to_idx)
        gdf['to_idx'] = gdf['to_node'].map(node_to_idx)
        
        edges = gdf[['from_idx', 'to_idx']].values
        edge_car_volumes = gdf['vol_car'].values
        capacities = gdf['capacity'].values
        edge_positions = np.array([((geom.coords[0][0] + geom.coords[-1][0]) / 2, 
                                    (geom.coords[0][1] + geom.coords[-1][1]) / 2) 
                                   for geom in gdf.geometry])

        # Convert lists to tensors
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        edge_positions_tensor = torch.tensor(edge_positions, dtype=torch.float)
        x = torch.zeros((len(nodes), 1), dtype=torch.float)
        
        # Create Data object
        target_values = torch.tensor(edge_car_volumes, dtype=torch.float).unsqueeze(1)
        data = Data(edge_index=edge_index, x=x, pos=edge_positions_tensor)
        
        # Transform to line graph
        linegraph_data = linegraph_transformation(data)
        
        # Prepare the x for line graph: index and capacity
        linegraph_x = torch.tensor(capacities, dtype=torch.float).unsqueeze(1)
        linegraph_data.x = linegraph_x
        
        # Target tensor for car volumes
        linegraph_data.y = target_values
        
        if linegraph_data.validate(raise_on_error=True):
            datalist.append(linegraph_data)
        else:
            print("Invalid line graph data")
            
# dataset = MyGeometricDataset(datalist)
# Convert dataset to a list of dictionaries
data_dict_list = [{'x': data.x, 'edge_index': data.edge_index, 'pos': data.pos, 'y': data.y} for data in datalist]

# Save the list of dictionaries
torch.save(data_dict_list, 'dataset_1pm_0-1382.pt')