In [3]:
import datetime
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from sklearn.utils.random import sample_without_replacement
import geopandas as gpd
import momepy
from shapely.geometry import Polygon
import alphashape
import osmnx as ox
from shapely.wkt import loads
import pickle

data_path = '../../data/' 
paris_districts = gpd.read_file(data_path + 'districts_paris.geojson')
df_car_detectors = gpd.read_file(data_path + 'all_car_detectors.geojson')
matched_detectors_2013 = pd.read_csv('../network_matching/output/detectors_matched_2_osm_01_2013.csv', sep=";")
matched_detectors_2023 = pd.read_csv('../network_matching/output/detectors_matched_2_osm_01_2024.csv', sep=";")
qgis_road_network = gpd.read_file(data_path + 'network/QGIS_Project/referentiel-comptages-edit.shp')
alpha_shape = alphashape.alphashape(qgis_road_network, 435)
coordinates = list(alpha_shape.exterior[0].coords)
polygon = Polygon(coordinates)

from resampled_mfd import ResampledMFD

resultpath = 'results/'

def merge_districts(districts_to_merge:list):
    districts_to_merge = paris_districts[paris_districts['c_ar'].isin(districts_to_merge)]
    merged_districts = districts_to_merge.unary_union
    merged = gpd.GeoDataFrame(geometry=[merged_districts], crs=paris_districts.crs)
    return merged

def read_detector_data_2010():
    ldd = pd.read_csv(data_path + '/traffic_data/traffic_data_2010_2012.csv')
    ldd['t_1h'] = pd.to_datetime(ldd['t_1h'])
    ldd_2010 = ldd[ldd['t_1h'].dt.year == 2010]
    return ldd_2010

def read_detector_data_2023():
    ldd_2023 = pd.read_csv(data_path + '/traffic_data/traffic_data_2023.csv')
    ldd_2023['t_1h'] = pd.to_datetime(ldd_2023['t_1h'])
    return ldd_2023

# class ResampledMFD():
#     def __init__(self, ldd, p_sample: float, n_combinations: int):
#         self.ldd = ldd
#         self.p_sample = p_sample
#         self.n_combinations = n_combinations

#     def compute_resampled_mfd(self):
#         self.resampled_mfd = ResampledMFD.resample_mfd(
#             self.ldd, self.p_sample, self.n_combinations)
#         resampled_mfd_envelope, capacity, critical_occupancy = ResampledMFD.get_resampled_mfd_envelope(
#             self.resampled_mfd)
#         self.resampled_mfd_envelope = resampled_mfd_envelope
#         self.capacity = capacity
#         self.critical_occupancy = critical_occupancy
#         return

#     def print_resampled_mfd(self):
#         print(self.capacity, self.critical_occupancy)

#     def resample_mfd(ldd, p_sample, n_combinations):
#         n_population = ldd.iu_ac.nunique()
#         n_samples = int(n_population * p_sample)
#         population = ldd.iu_ac.unique().tolist()
#         population_subsets = []
#         seen_subsets = set()
#         while len(population_subsets) < n_combinations:
#             subsets_indices = tuple(
#                 sorted(sample_without_replacement(n_population, n_samples)))
#             if subsets_indices not in seen_subsets:
#                 subset = [population[n] for n in subsets_indices]
#                 population_subsets.append(subset)
#                 seen_subsets.add(subsets_indices)
#             else:
#                 continue

#         subsets_mfds = []
#         for idx, subset in enumerate(population_subsets):
#             print(f"Processing subset {idx+1}/{len(population_subsets)}")
#             subset_ldd = ldd.loc[ldd.iu_ac.isin(subset)]
#             mfd = []
#             for tsp, group in subset_ldd.groupby('t_1h'):
#                 length_street_segments = group['geometry_detector'].length.sum()
#                 q_per_lane_km_total = 0
#                 k_per_lane_km_total = 0
#                 for idx, row in group.iterrows():
#                     q = row['q']
#                     density = row['k']/0.0055/100
#                     length = row['geometry_detector'].length
#                     lanes = row['lanes_mapped']
#                     q_per_lane_km = (length * q) / lanes
#                     k_per_lane_km = length * density 
#                     q_per_lane_km_total += q_per_lane_km
#                     k_per_lane_km_total += k_per_lane_km
                
#                 flow = q_per_lane_km_total / length_street_segments
#                 density = k_per_lane_km_total / length_street_segments
#                 mfd.append((tsp, flow, density))
#             mfd = pd.DataFrame(
#                 mfd, columns=['tsp', 'flow', 'density'])
#             subsets_mfds.append(mfd)

#         resampled_mfd = pd.concat(subsets_mfds)
#         return resampled_mfd

#     def get_resampled_mfd_envelope(resampled_mfd):
#         # choose the number of bins that best fits occupancy values
#         num_bins = 10  # Adjust this number according to your preference
#         resampled_mfd['density_bin'] = pd.cut(resampled_mfd['density'], bins=num_bins)
        
#         # resampled_mfd['density_bin'] = pd.cut(resampled_mfd['density'],
#         #                                         bins=int(resampled_mfd['density'].max()))
#         # taking the median of top M flow values per occupancy bin
#         resampled_mfd_envelope = []
#         for bin, temp in resampled_mfd.groupby('density_bin', observed=True):
#             # der default wert hier ist "50"
#             upper_flow = temp.nlargest(50, 'flow', 'all').flow.median()
#             density = bin.mid
#             resampled_mfd_envelope.append((upper_flow, density))
#         resampled_mfd_envelope = pd.DataFrame(
#             resampled_mfd_envelope, columns=['flow', 'density'])

#         # calculate the 95th/ 97.5th percentile of flow as the capacity
#         capacity = np.percentile(
#             resampled_mfd_envelope.flow, 97.5, method='nearest')

#         rounded_capacity = round(capacity, 2)

#         matching_rows = resampled_mfd_envelope.loc[round(
#             resampled_mfd_envelope.flow, 2) == rounded_capacity]
#         if not matching_rows.empty:
#             critical_density = matching_rows['density'].iloc[0]
#         else:
#             # Handle the case where no rows match the condition
#             # You might want to set a default value or raise an exception
#             critical_density = None  # or any other suitable value

#         return resampled_mfd_envelope, capacity, critical_density


def get_ldd_for_district(district_list: list, gdf_ldd: gpd.GeoDataFrame):
    districts = merge_districts(district_list)
    ldd_within_districts = gpd.sjoin(
        gdf_ldd, districts, how="inner", op="within")
    ldd_within_districts.drop(columns=['index_right'], inplace=True)
    ldd_within_districts = ldd_within_districts.groupby(["iu_ac", "day"]).filter(
        lambda x: len(x) == 18 and x["q"].notnull().all() and x["k"].notnull().all())
    ldd_within_districts.reset_index(drop=True, inplace=True)
    return ldd_within_districts

def get_road_network_graph(polygon):
    ox.settings.log_console = True
    G_road_network = ox.graph_from_polygon(
        polygon, simplify=True, network_type="drive")
    nodes, edges = momepy.nx_to_gdf(G_road_network, points=True, lines=True)
    edges['index'] = range(1, len(edges) + 1)
    return nodes, edges

def process_car_detectors(polygon):
    df_car_detectors_without_multiples = df_car_detectors.drop_duplicates(
        subset='iu_ac', keep='first')
    boundary_gdf = gpd.GeoDataFrame(
        geometry=[polygon], crs=df_car_detectors_without_multiples.crs)
    car_detectors_within_boundary = gpd.sjoin(
        df_car_detectors_without_multiples, boundary_gdf, op='within')
    return car_detectors_within_boundary

def get_merged_geodataframe(matched_detectors, ldd):
    matched_detectors_without_dupl = matched_detectors.drop_duplicates(
        subset='iu_ac', keep='first')
    merged_ldd = pd.merge(ldd, matched_detectors_without_dupl[[
                      'iu_ac', 'geometry_detector', 'highway', 'oneway', 'length_mapped_osm_street','score','length_detector_street','lanes_mapped']], on='iu_ac', how='inner')
    merged_ldd['geometry_detector'] = merged_ldd['geometry_detector'].apply(loads)
    return gpd.GeoDataFrame(merged_ldd, geometry='geometry_detector')

def get_ldd_for_district_this(district_list: list, gdf_ldd: gpd.GeoDataFrame):
    # depending on how many hours of the day are considered, we need to change len(x) to 4 or to 18 or to whatever. 
    districts = merge_districts(district_list)
    ldd_within_districts = gpd.sjoin(
        gdf_ldd, districts, how="inner", op="within")
    ldd_within_districts.drop(columns=['index_right'], inplace=True)
    ldd_within_districts = ldd_within_districts.groupby(["iu_ac", "day"]).filter(
        lambda x: len(x) == 18 and x["q"].notnull().all() and x["k"].notnull().all())
    ldd_within_districts.reset_index(drop=True, inplace=True)
    return ldd_within_districts

In [2]:
gdf_ldd_2010 = get_merged_geodataframe(matched_detectors_2013, read_detector_data_2010())
gdf_ldd_2023 = get_merged_geodataframe(matched_detectors_2023, read_detector_data_2023())

## Create MFDs

Zuerst erstellen wir die resampled MFDs.

In [4]:
districts_to_test = [[5, 6, 7]]

ps_to_test = [0.7]

counter = 0
for idx, district in enumerate(districts_to_test):
    for p in ps_to_test:
        print(idx, district)
        print("p: ", p)
        ldd_district_2010 = get_ldd_for_district_this(district, gdf_ldd_2010)
        ldd_district_2023 = get_ldd_for_district_this(district, gdf_ldd_2023)
        ldd_district_2023['t_1h'] = ldd_district_2023['t_1h'].dt.tz_localize(None)

        resampled_district_2010 = ResampledMFD.resample_mfd(ldd_district_2010, p, 100)
        resampled_district_2023 = ResampledMFD.resample_mfd(ldd_district_2023, p, 100)
        
        if district == [1, 2, 3, 4]:
            zone = "1" 
        else:
            zone = "2"
        
        with open('output/resampled_mfd_zone_' + zone + '_2010_p_' + str(p) + '_c_100.pkl', 'wb') as f:
            pickle.dump(resampled_district_2010, f)

        with open('output/resampled_mfd_zone_' + zone + '_2023_p_' + str(p) + '_c_100.pkl', 'wb') as f:
            pickle.dump(resampled_district_2023, f)

0 [5, 6, 7]
p:  0.7


  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  ldd_within_districts = gpd.sjoin(
  exec(code_obj, self.user_global_ns, self.user_ns)
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  ldd_within_districts = gpd.sjoin(


Processing subset 1/100
Processing subset 2/100
Processing subset 3/100
Processing subset 4/100
Processing subset 5/100
Processing subset 6/100
Processing subset 7/100
Processing subset 8/100


KeyboardInterrupt: 