In [1]:
import os
import glob
import math
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd
import torch

import os
import pandas as pd
import geopandas as gpd
import xml.etree.ElementTree as ET
import gzip
import json
import fiona
from collections import defaultdict
from torch_geometric.transforms import LineGraph
from torch_geometric.data import Data, Batch
import fiona


highway_mapping = {
    'trunk': 0, 'trunk_link': 0, 'motorway_link': 0,
    'primary': 1, 'primary_link': 1,
    'secondary': 2, 'secondary_link': 2,
    'tertiary': 3, 'tertiary_link': 3,
    'residential': 4, 'living_street': 5,
    'pedestrian': 6, 'service': 7,
    'construction': 8, 'unclassified': 9,
    'np.nan': -1
}

base_dir = '../../../../data/pop_1pm_simulations/idf_1pm/' 

In [2]:
# List all the files in the base_dir
files = os.listdir(base_dir)

for file in files:
    file_path = os.path.join(base_dir, file)
    base_name, ext = os.path.splitext(file)
    if base_name.startswith("idf_1pm_"):
        base_name = base_name.replace("idf_1pm_", "")
    var_name = base_name  # Start with the cleaned base name
    
    if file.endswith('.csv'):
        try:
            var_name = f"{var_name}_df"  
            globals()[var_name] = pd.read_csv(file_path, sep=";")
            print(f"Loaded CSV file: {file} into variable: {var_name}")
        except Exception as e:
            print(f"Error loading CSV file {file}: {e}")
            
    elif file.endswith('.gpkg'):
        try:
            var_name = f"{var_name}_gdf"  
            layers = fiona.listlayers(file_path)
            
            # Read each layer into a GeoDataFrame
            geodataframes = {layer: gpd.read_file(file_path, layer=layer, geometry = 'geometry', crs="EPSG:2154") for layer in layers}

            # Display the first few rows of each GeoDataFrame
            for layer, gdf in geodataframes.items():
                print(f"Layer: {layer}")
                gdf = gdf.to_crs(epsg=4326)
                
                globals()[var_name] = gdf
                print(f"Loaded GPKG file: {file} into variable: {var_name}")
        except Exception as e:
            print(f"Error loading CSV file {file}: {e}")

Loaded CSV file: idf_1pm_persons.csv into variable: persons_df
Layer: idf_1pm_commutes
Loaded GPKG file: idf_1pm_commutes.gpkg into variable: commutes_gdf
Loaded CSV file: idf_1pm_households.csv into variable: households_df
Loaded CSV file: idf_1pm_trips.csv into variable: trips_df
Loaded CSV file: idf_1pm_activities.csv into variable: activities_df
Loaded CSV file: idf_1pm_vehicle_types.csv into variable: vehicle_types_df
Layer: idf_1pm_trips
Loaded GPKG file: idf_1pm_trips.gpkg into variable: trips_gdf
Layer: idf_1pm_activities
Loaded GPKG file: idf_1pm_activities.gpkg into variable: activities_gdf
Loaded CSV file: idf_1pm_vehicles.csv into variable: vehicles_df
Layer: idf_1pm_homes
Loaded GPKG file: idf_1pm_homes.gpkg into variable: homes_gdf


# Relevant dataframes are:

activities_g
commutes_g
households_c
trips_g
vehicles_c

In [3]:
population_df = pd.read_csv("intermediate_results/population.csv")
transit_schedule_relations_df = pd.read_csv("intermediate_results/transit_schedule_relations.csv")
transit_schedule_stop_facilities_df = pd.read_csv("intermediate_results/transit_schedule_stop_facilities.csv")

In [4]:
# Sorting the DataFrame by the "id" column
sorted_population_df = population_df.sort_values(by="id")
sorted_persons_df = persons_df.sort_values(by="person_id")
merged_df = pd.merge(sorted_persons_df, sorted_population_df, left_on="person_id", right_on="id")
removed_some_columns = merged_df.copy()
removed_some_columns = removed_some_columns.drop(columns=['employed_y', 'hasPtSubscription', 'householdId', 'sex_y', 'htsPersonId', 'censusPersonId', 'hasLicense', 'id', 'age_y'])
updated_persons = removed_some_columns.copy()

In [20]:
updated_persons.head(20)

Unnamed: 0,person_id,household_id,age_x,employed_x,sex_x,socioprofessional_class,has_driving_license,has_pt_subscription,census_person_id,hts_id,bikeAvailability,carAvailability,censusHouseholdId,householdIncome,htsHouseholdId,isPassenger,vehicles
0,1972,1084,23,True,male,4,True,True,749,982,none,none,410,1098.13737,463,False,"{""car"":""1972:car"",""car_passenger"":""1972:car_pa..."
1,1973,1084,23,True,female,4,True,True,750,1054,none,none,410,1098.13737,502,False,"{""car"":""1973:car"",""car_passenger"":""1973:car_pa..."
2,2973,1590,39,False,male,6,True,True,1143,5283,none,none,615,3287.39681,2380,False,"{""car"":""2973:car"",""car_passenger"":""2973:car_pa..."
3,5524,2921,32,True,female,4,False,False,2058,442,some,none,1106,2109.866062,177,False,"{""car"":""5524:car"",""car_passenger"":""5524:car_pa..."
4,5525,2921,31,True,male,5,False,False,2059,7081,some,none,1106,2109.866062,3077,False,"{""car"":""5525:car"",""car_passenger"":""5525:car_pa..."
5,7199,3784,71,True,female,4,False,False,2713,3604,none,none,1442,1556.330885,1667,False,"{""car"":""7199:car"",""car_passenger"":""7199:car_pa..."
6,14041,7514,67,False,female,7,True,True,5455,512,none,none,2940,6192.562297,216,False,"{""car"":""14041:car"",""car_passenger"":""14041:car_..."
7,14246,7606,31,True,female,4,True,True,5533,840,none,none,2975,1715.325021,380,False,"{""car"":""14246:car"",""car_passenger"":""14246:car_..."
8,16252,8634,44,True,male,3,True,False,6299,1206,none,some,3374,6272.979646,583,False,"{""car"":""16252:car"",""car_passenger"":""16252:car_..."
9,16253,8634,74,False,female,7,True,True,6300,1297,none,some,3374,6272.979646,636,False,"{""car"":""16253:car"",""car_passenger"":""16253:car_..."


In [21]:
updated_persons['socioprofessional_class'].unique()

array([4, 6, 5, 7, 3, 8, 2, 1])

In [7]:
len(activities_gdf)

50834

In [13]:
grouped  = activities_gdf.groupby('purpose')

In [16]:
for name, group in grouped:
    print(f"Group name: {name}")
    print(len(group))

Group name: education
3243
Group name: home
28594
Group name: leisure
4792
Group name: other
4006
Group name: shop
4433
Group name: work
5766


In [8]:
activities_gdf.head()

Unnamed: 0,person_id,household_id,activity_index,preceding_trip_index,following_trip_index,purpose,start_time,end_time,is_first,is_last,geometry
0,1972,1084,0,-1,0,home,,67289.0,True,False,POINT (2.37653 48.86644)
1,1972,1084,1,0,1,leisure,69269.0,78089.0,False,False,POINT (2.32311 48.88089)
2,1972,1084,2,1,-1,home,79889.0,,False,True,POINT (2.37653 48.86644)
3,1973,1084,0,-1,0,home,,33740.0,True,False,POINT (2.37653 48.86644)
4,1973,1084,1,0,1,work,35840.0,65240.0,False,False,POINT (2.31411 48.87720)


In [9]:
len(activities_gdf[activities_gdf['purpose'] == 'home'])

28594

In [10]:
activities_df.head()

Unnamed: 0,person_id,household_id,activity_index,preceding_trip_index,following_trip_index,purpose,start_time,end_time,is_first,is_last
0,1972,1084,0,-1,0,home,,67289.0,True,False
1,1972,1084,1,0,1,leisure,69269.0,78089.0,False,False
2,1972,1084,2,1,-1,home,79889.0,,False,True
3,1973,1084,0,-1,0,home,,33740.0,True,False
4,1973,1084,1,0,1,work,35840.0,65240.0,False,False


In [11]:
homes_gdf.sort_values(by='household_id')

Unnamed: 0,household_id,geometry
0,1084,POINT (2.37653 48.86644)
1,1590,POINT (2.37969 48.84590)
2,2921,POINT (2.32901 48.89036)
3,3784,POINT (2.36433 48.85736)
4,7514,POINT (2.31159 48.83649)
...,...,...
5295,5479071,POINT (2.45167 48.98739)
5296,5479851,POINT (2.35626 49.07445)
5462,5479946,POINT (2.23220 48.95621)
5297,5481534,POINT (2.05988 49.04099)


In [22]:
trips_gdf.head()

Unnamed: 0,person_id,trip_index,preceding_activity_index,following_activity_index,departure_time,arrival_time,preceding_purpose,following_purpose,is_first,is_last,geometry
0,1972,0,0,1,67289.0,69269.0,home,leisure,True,False,"LINESTRING (2.37653 48.86644, 2.32311 48.88089)"
1,1972,1,1,2,78089.0,79889.0,leisure,home,False,True,"LINESTRING (2.32311 48.88089, 2.37653 48.86644)"
2,1973,0,0,1,33740.0,35840.0,home,work,True,False,"LINESTRING (2.37653 48.86644, 2.31411 48.87720)"
3,1973,1,1,2,65240.0,67940.0,work,home,False,True,"LINESTRING (2.31411 48.87720, 2.37653 48.86644)"
4,2973,0,0,1,21976.0,24376.0,home,work,True,False,"LINESTRING (2.37969 48.84590, 2.54708 49.00713)"


In [23]:
len(trips_gdf)

38293

In [24]:
transit_schedule_relations_df.head()

Unnamed: 0,fromStop,toStop,transferTime
0,IDFM:35594.link:260959,IDFM:35593.link:260960,60.0
1,IDFM:24454.link:143141,IDFM:24452.link:322682,300.0
2,IDFM:24454.link:143141,IDFM:25273.link:378656,333.0
3,IDFM:24454.link:143141,IDFM:35823.link:52729,253.0
4,IDFM:24454.link:143141,IDFM:24455.link:322683,262.0


In [25]:
transit_schedule_stop_facilities_df

Unnamed: 0,id,linkRefId,x,y,name,stopAreaId,isBlocking
0,IDFM:10001.link:347106,347106,640646.000048,6.880849e+06,Les Châtaigniers,IDFM:66540,False
1,IDFM:10002.link:347105,347105,640643.000048,6.880827e+06,Les Châtaigniers,IDFM:66540,False
2,IDFM:10003.link:485992,485992,627418.000058,6.856641e+06,Erich Von Stroheim,IDFM:63975,False
3,IDFM:10003.link:485993,485993,627418.000058,6.856641e+06,Erich Von Stroheim,IDFM:63975,False
4,IDFM:10004.link:485995,485995,627406.000058,6.856090e+06,Fritz Lang,IDFM:63913,False
...,...,...,...,...,...,...,...
42611,IDFM:monomodalStopPlace:58879.link:287964,287964,669441.500024,6.863869e+06,Chelles - Gournay,IDFM:68407,False
42612,IDFM:monomodalStopPlace:58937.link:557193,557193,669182.653025,6.860398e+06,Noisy - Champs,IDFM:73163,False
42613,IDFM:monomodalStopPlace:58937.link:704712,704712,669182.653025,6.860398e+06,Noisy - Champs,IDFM:73163,False
42614,IDFM:monomodalStopPlace:59206.link:398937,398937,648412.609741,6.853698e+06,Sceaux,IDFM:74265,False
