In [1]:
import datetime
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.wkt import loads

data_path = '../../data/' 
df_car_detectors = gpd.read_file(data_path + 'all_car_detectors.geojson')
matched_detectors_2023 = pd.read_csv('../network_matching/output/detectors_matched_with_bvd_peripherique_2_osm_01_2024.csv', sep=";")

def read_detector_data_2023():
    ldd_2023 = pd.read_csv(data_path + '/traffic_data/traffic_data_2023.csv')
    ldd_2023['t_1h'] = pd.to_datetime(ldd_2023['t_1h'])
    return ldd_2023

def get_merged_geodataframe(matched_detectors, ldd):
    matched_detectors_without_dupl = matched_detectors.drop_duplicates(
        subset='iu_ac', keep='first')
    merged_ldd = pd.merge(ldd, matched_detectors_without_dupl[[
                      'iu_ac', 'geometry_detector', 'highway', 'oneway', 'length_mapped_osm_street','score','length_detector_street','lanes_mapped']], on='iu_ac', how='inner')
    merged_ldd['geometry_detector'] = merged_ldd['geometry_detector'].apply(loads)
    return gpd.GeoDataFrame(merged_ldd, geometry='geometry_detector')

## Abstract

The goal of this notebook is to find the optimal scaling parameter s. We try different values for s and compare it with the TomTom values for 5 and 6 am ("free flow speed"). In the last week the speeds were at 38 km/h at 5 am and 34 km/h at 6 am, for "city centre". One can expect that this will have been similar in 2023. Therefore, we can calibrate the speeds with the speeds from 2023. Note that we use the detector data for all detectors including those on the Boulevard Peripherique - one could 


In [2]:
ldd_2023 = read_detector_data_2023()
gdf_ldd_2023 = get_merged_geodataframe(matched_detectors_2023, ldd_2023)

In [3]:
# plot matched_detectors
# fig, ax = plt.subplots(figsize=(10, 10))
# matched_detectors_2023.plot(ax=ax, color='red', markersize=5)
# matched_detectors_2023['geometry_detector'] = matched_detectors_2023['geometry_detector'].apply(loads)
# gdf_detectors = gpd.GeoDataFrame(matched_detectors_2023, geometry='geometry_detector')

# gdf_detectors.plot()

# plot detectors with observed data
# gdf_ldd_double = gdf_ldd_2023.copy()
# gdf_ldd_double = gdf_ldd_double.drop_duplicates(subset='iu_ac', keep='first')

In [4]:
this_ldd = gdf_ldd_2023.copy()

ldd_relevant = this_ldd.groupby(["iu_ac", "day"]).filter(
    lambda x: len(x) == 18 and x["q"].notnull().all() and x["k"].notnull().all())
ldd_relevant.reset_index(drop=True, inplace=True)

grouped = ldd_relevant.groupby("t_1h")
grouped_5_6_7_am = [(hour, group) for hour, group in grouped if hour.hour in [5, 6, 7]]

In [5]:
s_values = [0.005, 0.0055, 0.006, 0.0065, 0.007]

for s in s_values:
    print(" ")
    print("s: " +  str(s))
    hour_2_q_per_lane_km = {}
    hour_2_k_per_lane_km = {}
    for hour, group in grouped_5_6_7_am:
        length_street_segments = group['geometry_detector'].length.sum()
        
        q_per_lane_km_total = 0
        k_per_lane_km_total = 0
        for idx, row in group.iterrows():
            q = row['q']
            density = row['k']/s
            length = row['geometry_detector'].length
            lanes = row['lanes_mapped']
            q_per_lane_km = (length * q) / lanes
            k_per_lane_km = length * density 
            q_per_lane_km_total += q_per_lane_km
            k_per_lane_km_total += k_per_lane_km
        
        q_hd = q_per_lane_km_total / length_street_segments
        k_hd = k_per_lane_km_total / length_street_segments
        hour_2_q_per_lane_km[hour] = q_hd
        hour_2_k_per_lane_km[hour] = k_hd
        
    hours_5_am = []
    hours_6_am = []
    hours_7_am = []

    for hour, q in hour_2_q_per_lane_km.items():
        if hour.hour == 5:
            v = q/hour_2_k_per_lane_km[hour]
            hours_5_am.append(v * 100)
        elif hour.hour == 6:
            v = q/hour_2_k_per_lane_km[hour]
            hours_6_am.append(v * 100)
        elif hour.hour == 7:
            v = q/hour_2_k_per_lane_km[hour]
            hours_7_am.append(v * 100)
            
    hours_5_am = np.array(hours_5_am) 
    print("5 am mean: " + str(hours_5_am.mean()))

    hours_6_am = np.array(hours_6_am) 
    print("6 am mean: " + str(hours_6_am.mean()))

    hours_7_am = np.array(hours_7_am) 
    print("7 am mean: " + str(hours_7_am.mean()))


 
s: 0.005
5 am mean: 35.01200321812179
6 am mean: 31.059402726209143
7 am mean: 26.66518901942787
 
s: 0.0055
5 am mean: 38.51320353993396
6 am mean: 34.16534299883005
7 am mean: 29.331707921370654
 
s: 0.006
5 am mean: 42.01440386174614
6 am mean: 37.27128327145097
7 am mean: 31.99822682331345
 
s: 0.0065
5 am mean: 45.515604183558324
6 am mean: 40.377223544071875
7 am mean: 34.66474572525623
 
s: 0.007
5 am mean: 49.01680450537051
6 am mean: 43.483163816692795
7 am mean: 37.33126462719902


## Conclusion

We find that s = 0.0055 fits best, thus we choose this as an optimal s. 