In [47]:
import os
import glob
import gzip
import math
import random
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
from matplotlib.colors import LogNorm
import shapely.wkt as wkt
from shapely.geometry import Point, LineString, box
from shapely.ops import nearest_points
import lxml.etree as ET
import tqdm
import wandb
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset, Subset
import torch_geometric
from torch_geometric.data import Data, Batch
from torch_geometric.transforms import LineGraph
import re

import processing_io as pio

# Parameters to define
# result_path = "dataset_1pct_0_100"
basecase = '../../../../data/pop_1pm_basecase_OLD/'
case_with_policy = '../../../../data/pop_1pm_policy_in_zone_1/'

subdirs_pattern_bc = os.path.join(basecase, 'output_seed_*')
subdirs_pattern_zone_1 = os.path.join(case_with_policy, 'output_seed_*')

subdirs_basecase = list(set(glob.glob(subdirs_pattern_bc)))
subdirs_zone_1 = list(set(glob.glob(subdirs_pattern_zone_1)))

# subdirs.sort()

In [48]:
subdirs_basecase

['../../../../data/pop_1pm_basecase_OLD/output_seed_1',
 '../../../../data/pop_1pm_basecase_OLD/output_seed_2']

## Process results

Process the outputs of the simulations for further usage by GNN.

In [51]:
# Function to read and convert CSV.GZ to GeoDataFrame
def read_network_data(file_path):
    if os.path.exists(file_path):
        # Read the CSV file with the correct delimiter
        df = pd.read_csv(file_path, delimiter=';')
        # Convert the 'geometry' column to actual geometrical data
        df['geometry'] = df['geometry'].apply(wkt.loads)
        
        # Create a GeoDataFrame
        gdf = gpd.GeoDataFrame(df, geometry='geometry')
        return gdf
    else:
        return None
    

def extract_numbers(path):
    name = path.split('/')[-1]
    # Use regular expression to find all numbers in the string
    numbers = re.findall(r'\d+', name)
    # Convert the list of numbers to a set of integers
    return set(map(int, numbers))

def create_dic(subdir: str):
    result_dic = {}
    for s in subdir:
        print(f'Accessing folder: {s}')
        random_seed = extract_numbers(s)
        output_links = s + "/output_links.csv.gz"
        gdf = read_network_data(output_links)
        if gdf is not None:
            result_dic[str(random_seed)] = gdf
    return result_dic

In [52]:
random_seed_2_dataframe_basecase = create_dic(subdir=subdirs_basecase)
random_seed_2_dataframe_zone_1 = create_dic(subdir = subdirs_zone_1)

Accessing folder: ../../../../data/pop_1pm_basecase_OLD/output_seed_1
Accessing folder: ../../../../data/pop_1pm_basecase_OLD/output_seed_2


In [65]:
random_seed_2_dataframe_basecase["{2}"].head(10)

Unnamed: 0,link,from_node,to_node,length,freespeed,capacity,lanes,modes,vol_car,osm:relation:route_master,...,osm:way:id,osm:way:access,osm:way:oneway,osm:way:highway,osm:relation:route,osm:way:railway,osm:way:name,storageCapacityUsedInQsim,osm:way:tunnel,geometry
0,100315,24972409,24972408,16.181257,8.333333,480.0,1.0,"bus,car,car_passenger",9,,...,4216830.0,,yes,residential,bicycle,,Carrefour de l'Odéon,,,"LINESTRING (651473.304 6861511.110, 651457.123..."
1,100316,5904976363,24983651,14.860209,8.333333,480.0,1.0,"bus,car,car_passenger,pt",10,,...,4216831.0,,,tertiary,bus,,Carrefour de l'Odéon,,,"LINESTRING (651477.818 6861578.659, 651475.677..."
2,100317,24983651,5904976363,14.860209,8.333333,960.0,2.0,"bus,car,car_passenger,pt",4,,...,4216831.0,,,tertiary,bus,,Carrefour de l'Odéon,,,"LINESTRING (651475.677 6861563.954, 651477.818..."
3,100321,664205947,24972376,22.26454,8.333333,960.0,2.0,"car,car_passenger",8,,...,4216834.0,,yes,residential,,,Boulevard Saint-Germain,,,"LINESTRING (651565.078 6861531.470, 651559.387..."
4,100324,24972376,24972375,64.853276,8.333333,480.0,1.0,"bus,car,car_passenger",9,,...,4216833.0,,yes,residential,bicycle,,Rue Dupuytren,,,"LINESTRING (651559.387 6861509.945, 651502.714..."
5,100325,324579210,4964831516,8.333333,8.333333,480.0,1.0,"car,car_passenger",5,,...,4216839.0,,yes,residential,,,Rue Racine,,,"LINESTRING (651762.040 6861341.402, 651757.328..."
6,100326,4964831516,24972333,194.794136,8.333333,480.0,1.0,"car,car_passenger",5,,...,4216839.0,,yes,residential,,,Rue Racine,,,"LINESTRING (651757.328 6861342.130, 651566.843..."
7,100329,24972382,4964831514,103.426452,5.555556,240.0,1.0,"car,car_passenger",0,,...,4216837.0,,yes,living_street,,,Rue Pierre Sarrazin,,,"LINESTRING (651679.813 6861432.168, 651768.987..."
8,100331,24972408,24972143,10.831937,8.333333,480.0,1.0,"bus,car,car_passenger,pt",20,,...,4216820.0,,yes,tertiary,"bus,bicycle",,Carrefour de l'Odéon,,,"LINESTRING (651457.123 6861511.079, 651452.045..."
9,100332,24972144,24972325,87.127269,8.333333,480.0,1.0,"car,car_passenger",6,,...,4216823.0,,yes,residential,,,Rue Crébillon,,,"LINESTRING (651425.825 6861382.374, 651452.090..."


In [68]:
geodataframes_basecase = list(random_seed_2_dataframe_basecase.values())
geodataframes_zone_1 = list(random_seed_2_dataframe_zone_1.values())

def compute_average_or_median_geodataframe(geodataframes, column_name, is_mean: bool = True):
    """
    Compute the average GeoDataFrame from a list of GeoDataFrames for a specified column.
    
    Parameters:
    geodataframes (list of GeoDataFrames): List containing GeoDataFrames
    column_name (str): The column name for which to compute the average
    
    Returns:
    GeoDataFrame: A new GeoDataFrame with the average values for the specified column
    """
    # Create a copy of the first GeoDataFrame to use as the base
    average_gdf = geodataframes[0].copy()
    
    # Extract the specified column values from all GeoDataFrames
    column_values = np.array([gdf[column_name].values for gdf in geodataframes])
    
    if (is_mean):
    # Calculate the average values for the specified column
        column_average = np.mean(column_values, axis=0)
    else:
        column_average = np.median(column_values, axis=0)

    # Assign the average values to the new GeoDataFrame
    average_gdf[column_name] = column_average
    
    return average_gdf

gdf_basecase_mean = compute_average_or_median_geodataframe(geodataframes=geodataframes_basecase, column_name="vol_car", is_mean=True)
gdf_zone_1_mean = compute_average_or_median_geodataframe(geodataframes=geodataframes_zone_1, column_name="vol_car", is_mean=True)

In [None]:
def compute_difference_geodataframe(gdf1, gdf2, column_name):
    """
    Compute the difference of a specified column between two GeoDataFrames.
    
    Parameters:
    gdf1 (GeoDataFrame): The first GeoDataFrame
    gdf2 (GeoDataFrame): The second GeoDataFrame
    column_name (str): The column name for which to compute the difference
    
    Returns:
    GeoDataFrame: A new GeoDataFrame with the differences for the specified column
    """
    # Ensure the two GeoDataFrames have the same shape
    if gdf1.shape != gdf2.shape:
        raise ValueError("GeoDataFrames must have the same shape")

    # Ensure the two GeoDataFrames have the same indices
    if not gdf1.index.equals(gdf2.index):
        raise ValueError("GeoDataFrames must have the same indices")
    
    # Ensure the two GeoDataFrames have the same geometries
    if not gdf1.geometry.equals(gdf2.geometry).all():
        raise ValueError("GeoDataFrames must have the same geometries")
    
    # Create a copy of the first GeoDataFrame to use as the base for the difference GeoDataFrame
    difference_gdf = gdf1.copy()

    # Compute the difference for the specified column
    difference_gdf[column_name] = gdf1[column_name] - gdf2[column_name]

    return difference_gdf

gdf_basecase_difference = compute_difference_geodataframe(gdf_basecase_mean, gdf_zone_1_mean, 'vol_car')

In [None]:
pio.plot_simulation_output("diff", gdf_basecase_difference)

In [None]:
# data_processed = pio.process_result_dic(result_dic)
# data_processed_single_districts = pio.process_result_dic(result_dic_single_districts)

# torch.save(data_processed, result_path + '.pt')
# torch.save(data_processed_single_districts, result_path + '_single_districts.pt')