In [4]:
import numpy as np
import numpy.linalg as la
import pandas as pd
import geopandas as gpd
from tqdm.notebook import tqdm

import osmnx as ox
import momepy
import matplotlib.pyplot as plt
from shapely.geometry import Polygon
import alphashape
from pyproj import Proj, Geod
import ast
from shapely.ops import cascaded_union, polygonize, unary_union

data_path = '../../data/'  
polygon_road_network = gpd.read_file(data_path + 'network/QGIS_Project/referentiel-comptages-edit.shp')
paris_districts = gpd.read_file(data_path + 'districts_paris.geojson')
df_car_detectors = gpd.read_file(data_path + 'all_car_detectors.geojson')

paris_districts = gpd.read_file('../../data/districts_paris.geojson')
polygon_all_districts = paris_districts.unary_union

def get_exterior_coords(df, start_point, end_point):
    filtered_gdf = df[(df["c_ar"] >= start_point) & (df["c_ar"] <= end_point)]

    # Check if there are any polygons matching the condition
    if not filtered_gdf.empty:
        # Apply unary_union to combine the selected polygons into a single polygon
        districts_polygon = unary_union(filtered_gdf["geometry"])
    else:
        # If no polygons match the condition, union_polygon will be None
        districts_polygon = None

    return districts_polygon.exterior.coords.xy


def get_polygon_geometry(df, start_point, end_point):
    filtered_gdf = df[(df["c_ar"] >= start_point) & (df["c_ar"] <= end_point)]

    # Check if there are any polygons matching the condition
    if not filtered_gdf.empty:
        # Apply unary_union to combine the selected polygons into a single polygon
        districts_polygon = unary_union(filtered_gdf["geometry"])
    else:
        # If no polygons match the condition, union_polygon will be None
        districts_polygon = None

    return districts_polygon

def transform_highway(value):
    if isinstance(value, list):
        return value[0] if value else None
    else:
        return value
    
def filter_for_district(x_coords, y_coords, df):
    district_polygon = Polygon(zip(x_coords, y_coords))

    # Create a GeoDataFrame containing the district polygon
    district_gdf = gpd.GeoDataFrame(geometry=[district_polygon], crs=df.crs)

    # Use the GeoDataFrame's cx attribute to spatially filter cycleways_2010_2022
    return gpd.overlay(df, district_gdf, how='intersection')


# def approximate_number_of_lanes(df):
#     df_with_lanes_approx = df.copy()
#     # Convert 'lanes' column to numeric
#     # df_with_lanes_approx['lanes'] = pd.to_numeric(df_with_lanes_approx['lanes'], errors='coerce')
#     # Calculate average lanes per highway
#     average_lanes_per_highway = df_with_lanes_approx.groupby('highway')['lanes'].mean()
#     print(average_lanes_per_highway)
#     for index, row in df_with_lanes_approx.iterrows():
#         if pd.isna(row['lanes']):
#             df_with_lanes_approx.at[index, 'lanes'] = average_lanes_per_highway.get(row['highway'])
#     return df_with_lanes_approx

In [5]:
x_district_1_4, y_district_1_4  = get_exterior_coords(paris_districts, 1, 4)
x_district_5_7, y_district_5_7  = get_exterior_coords(paris_districts, 5, 7)

# district_1_4 = get_polygon_geometry(paris_districts, 1, 4)
# district_5_7 = get_polygon_geometry(paris_districts, 5, 7)

In [16]:
def is_na_list(lst):
    return lst is None or len(lst) == 0 or all(pd.isna(x) for x in lst)

def parse_and_average_lanes(lanes_str):
    if isinstance(lanes_str, list):
        if is_na_list(lanes_str):
            return np.nan
        else: 
            return sum(map(int, lanes_str)) / len(lanes_str)
    else:
        if pd.isna(lanes_str):  # Check if input is NaN
            return np.nan  # Return NaN if input is NaN
    try:
        # Attempt to parse the string as a list
        lanes_list = ast.literal_eval(lanes_str)
        if isinstance(lanes_list, list):
            # If it's a list, calculate the average of list elements
            return sum(map(int, lanes_list)) / len(lanes_list)
        else:
            # If it's a single integer, return it as is
            return int(lanes_list)
    except (SyntaxError, ValueError):
        # If parsing fails or the lanes_str is not a list, parse as single integer
        return int(lanes_str)

# def approximate_number_of_lanes(df_matched):
#     df_matched_with_lanes_approximated = df_matched.copy()
#     print("OK")
#     average_lanes_per_highway = df_matched.groupby('highway')['lanes_mapped'].mean()
    
#     for index, row in df_matched_with_lanes_approximated.iterrows():
#         if pd.isna(row['lanes_mapped']):
#             df_matched_with_lanes_approximated.at[index, 'lanes_mapped'] = average_lanes_per_highway[row['highway']]
#     return df_matched_with_lanes_approximated

def line_length_in_meters(line_string):
    # Define a UTM projection for the zone containing your coordinates
    utm_zone = 31  # Assuming you are in Paris, which falls in UTM zone 31 for example
    proj = Proj(proj='utm', zone=utm_zone, ellps='WGS84')

    # Extract coordinates from the LineString
    coordinates = list(line_string.coords)

    # Transform the coordinates to UTM projection
    utm_coordinates = [proj(lon, lat) for lon, lat in coordinates]

    # Compute the distance between consecutive points in meters
    total_length = 0
    geod = Geod(ellps='WGS84')
    for i in range(len(utm_coordinates) - 1):
        lon1, lat1 = utm_coordinates[i]
        lon2, lat2 = utm_coordinates[i + 1]
        distance_meters = geod.inv(lon1, lat1, lon2, lat2)[-1]  # Use [-1] to get distance

        # Handle case of very small distances
        if np.isnan(distance_meters):
            dx = lon2 - lon1
            dy = lat2 - lat1
            distance_meters = np.sqrt(dx**2 + dy**2)
        total_length += distance_meters

    return total_length

In [64]:
def map_highway(df):
    highway_mapped = []
    for value in df['highway']:
        if isinstance(value, str):
            highway_mapped.append(value)
        elif isinstance(value, list):
            highway_mapped.append(value[0] if len(value) > 0 else None)
        else:
            highway_mapped.append(None)
    my_df = df.copy()
    my_df['highway_mapped'] = highway_mapped
    return my_df

In [70]:
alpha_shape = alphashape.alphashape(polygon_road_network, 435)
coordinates = list(alpha_shape.exterior[0].coords)
polygon = Polygon(coordinates)

G_2024 = ox.graph_from_polygon(polygon=polygon, simplify=True, network_type="drive")
nodes, edges_2024 = momepy.nx_to_gdf(G_2024, points=True, lines=True)

edges_2024['lanes_mapped'] = edges_2024['lanes'].apply(parse_and_average_lanes)
edges_2024 = map_highway(edges_2024)

average_lanes_per_highway_2024 = edges_2024.groupby('highway_mapped')['lanes_mapped'].mean()

  nodes, edges_2024 = momepy.nx_to_gdf(G_now, points=True, lines=True)


In [83]:


average_lanes_per_highway_2024['road'] = 2
average_lanes_per_highway_2024['virtual'] = 1


In [84]:
average_lanes_per_highway_2024

highway_mapped
living_street     1.290909
primary           3.078785
primary_link      1.844156
residential       1.436658
secondary         2.497629
secondary_link    1.636364
tertiary          2.074983
tertiary_link     1.166667
trunk             3.666667
trunk_link        1.760000
unclassified      1.689189
road              2.000000
virtual           1.000000
Name: lanes_mapped, dtype: float64

In [None]:
      # lanes_per_highway = {"primary": 3.09, "secondary": 2.48, "tertiary": 2.08, "trunk": 4, "primary_link": 3.09, "secondary_link": 2.48, "tertiary_link": 2.08, "trunk_link": 4}
        # edges.plot()

In [93]:
years = [2023, 2024]
zones = [1, 2]

def get_length_in_lane_km(df):
    length_in_lane_km = 0
    for idx, edge in df.iterrows():
        length = edge['length_computed']
        lanes = edge['lanes_mapped']
        length_edge = length * lanes/1000
        length_in_lane_km += length_edge
    return length_in_lane_km

for year in years:
    for zone in zones:
        if zone == 1:
            district = get_polygon_geometry(paris_districts, 1, 4)
        else: 
            district = get_polygon_geometry(paris_districts, 5, 7)
        overpass_settings = '[out:json][timeout:90][date:"' + str(year) + '-01-01T00:00:00Z"]'
        ox.settings.overpass_settings = overpass_settings
        ox.settings.log_console = True
    
        G = ox.graph_from_polygon(polygon=district, simplify=True, network_type="drive")
        nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)
        
        if 'lanes' not in edges.columns:
            edges['lanes'] = float('nan')
        edges['lanes_mapped'] = edges['lanes'].apply(parse_and_average_lanes)
        edges = map_highway(edges)  
   
        average_lanes_per_highway = edges.groupby('highway_mapped')['lanes_mapped'].mean()
        edges = edges[edges['geometry'].notnull()]
        edges['length_computed'] = edges['geometry'].apply(lambda x: line_length_in_meters(x))

        for index, row in edges.iterrows():
            if pd.isna(row['lanes_mapped']):
                approximated_lanes = average_lanes_per_highway[row['highway_mapped']]
                if (pd.isna(approximated_lanes)):
                    approximated_lanes = average_lanes_per_highway_2024[row['highway_mapped']]
                edges.at[index, 'lanes_mapped'] = approximated_lanes
        
        # filter for higher order roads
        edges_hor = edges[
            edges["highway"].str.contains("motorway") |
            edges["highway"].str.contains("trunk") |
            edges["highway"].str.contains("primary") |
            edges["highway"].str.contains("secondary") |
            edges["highway"].str.contains("tertiary") 
        ]
    
        length_in_km = edges['length_computed'].sum()/1000    
        length_in_lane_km = get_length_in_lane_km(edges)
        
        length_hor_in_km = edges_hor['length_computed'].sum()/1000
        length_in_lane_km_hor = get_length_in_lane_km(edges_hor)

        print(" ")
        print("Year: ", year, ", zone: ", zone)
        print("Length in km: " + str(length_in_km.round(2)))
        print("Length in lane km: " + str(length_in_lane_km))
        
        print("Length of higher order roads in km: " + str(length_hor_in_km.round(2)))
        print("Length of higher order roads in lane km: " + str(length_in_lane_km_hor))

  nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)


 
Year:  2023 , zone:  1
Length in km: 84.25
Length in lane km: 170.707364619837
Length of higher order roads in km: 28.75
Length of higher order roads in lane km: 83.15877582088862


  nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)


 
Year:  2023 , zone:  2
Length in km: 157.93
Length in lane km: 311.72340011508123
Length of higher order roads in km: 72.52
Length of higher order roads in lane km: 183.69322482604213


  nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)


 
Year:  2024 , zone:  1
Length in km: 83.69
Length in lane km: 168.6696743120909
Length of higher order roads in km: 34.2
Length of higher order roads in lane km: 94.47098272578333


  nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)


 
Year:  2024 , zone:  2
Length in km: 156.92
Length in lane km: 308.67686511819153
Length of higher order roads in km: 72.33
Length of higher order roads in lane km: 183.06145866081837


In [None]:
(85.28/) * 100

2.7814279456854885

Zuwachs Zone 1:
1.92 %
Decrease Zone 2: 2.78 % 

In [None]:
asdfads

In [None]:
# perform it for years 2013 - current year. For some reason, one cannot retrieve detector data from OSM from before 2013.
year = 2024

In [None]:
# def approximate_number_of_lanes(df_matched):
#     df_matched_with_lanes_approximated = df_matched.copy()
#     df_matched['highway'] = df_matched['highway'].apply(transform_highway)
#     average_lanes_per_highway = df_matched.groupby('highway')['lanes'].mean()
#     for index, row in df_matched_with_lanes_approximated.iterrows():
#         if pd.isna(row['lanes']):
#             df_matched_with_lanes_approximated.at[index, 'lanes'] = average_lanes_per_highway[row['highway']]
#     return df_matched_with_lanes_approximated

# def approximate_number_of_lanes(df_matched):
#     df_matched_with_lanes_approximated = df_matched.copy()
#     # Convert 'lanes' column to numeric
#     df_matched_with_lanes_approximated['lanes'] = pd.to_numeric(df_matched_with_lanes_approximated['lanes'], errors='coerce')
#     # Calculate average lanes per highway
#     average_lanes_per_highway = df_matched_with_lanes_approximated.groupby('highway')['lanes'].mean()
#     for index, row in df_matched_with_lanes_approximated.iterrows():
#         if pd.isna(row['lanes']):
#             df_matched_with_lanes_approximated.at[index, 'lanes'] = average_lanes_per_highway.get(row['highway'])
#     return df_matched_with_lanes_approximated

In [None]:
# # get OSM dataframe
# # alpha_shape = alphashape.alphashape(polygon_road_network, 435)
# # coordinates = list(alpha_shape.exterior[0].coords)
# # polygon = Polygon(coordinates)
# # x_coords, y_coords = zip(*coordinates)
# years = [2013, 2024]
# zones = [1, 2]

# for year in years:
#     print(" ")
#     for zone in zones:
#         if zone == 1:
#             district = get_polygon_geometry(paris_districts, 1, 4)
#         else: 
#             district = get_polygon_geometry(paris_districts, 5, 7)
#         overpass_settings = '[out:json][timeout:90][date:"' + str(year) + '-01-01T00:00:00Z"]'
#         ox.settings.overpass_settings = overpass_settings
#         ox.settings.log_console = True
        
#         for mode in modes:
#             G = ox.graph_from_polygon(
#                 district, simplify=True, network_type=mode, retain_all=True, truncate_by_edge=True)
#             nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)
#             length_in_km = edges['length'].sum()/1000
            
#             edges_with_lanes_approximated = approximate_number_of_lanes(edges)
            
#             if year == 2013:
#                 # nan_count = df_matched_with_lanes_approximated['lanes'].isna().sum()
#                 # print("Number of NaN values in 'lanes' column:", nan_count)
                
#                 # number of lanes are missing for highways of type "primary_link", "secondary_link", "trunk_link" and "tertiary_link"
#                 number_of_lanes_primary_link = edges[edges['highway'] == 'primary']['lanes'].mean()
#                 number_of_lanes_secondary_link = edges[edges['highway'] == 'secondary']['lanes'].mean()
#                 number_of_lanes_tertiary_link = edges[edges['highway'] == 'tertiary']['lanes'].mean()
#                 number_of_lanes_trunk_link = edges[edges['highway'] == 'trunk']['lanes'].mean()
#                 print(number_of_lanes_primary_link, number_of_lanes_secondary_link, number_of_lanes_trunk_link)
                
#                 edges.loc[edges['highway'] == 'primary_link', 'lanes'] = number_of_lanes_primary_link
#                 edges.loc[edges['highway'] == 'secondary_link', 'lanes'] = number_of_lanes_secondary_link
#                 edges.loc[edges['highway'] == 'tertiary_link', 'lanes'] = number_of_lanes_tertiary_link
#                 edges.loc[edges['highway'] == 'trunk_link', 'lanes'] = number_of_lanes_trunk_link
                
#                 # nan_count = df_matched_with_lanes_approximated['lanes'].isna().sum()
#                 # print("Number of NaN values in 'lanes' column:", nan_count)
    
#             length_in_lane_km = 0
#             for edge in edges_with_lanes_approximated.iterrows():
#                 length = edge['length']
#                 lanes = edge['lanes']
#                 length_edge = length * lanes/1000
#                 length_in_lane_km += length_edge
            
#             print("Year: ", year, ", zone: ", zone, "mode: ", mode)
#             print("Length in km: " + str(length_in_km.round(2)))
#             print("Length in lane km: " + str(length_in_lane_km.round(2)))

 


  nodes, edges = momepy.nx_to_gdf(G, points=True, lines=True)


NameError: name 'approximate_number_of_lanes' is not defined

In [None]:
x_district_1_4, y_district_1_4  = get_exterior_coords(paris_districts, 1, 4)
x_district_5_7, y_district_5_7  = get_exterior_coords(paris_districts, 5, 7)

In [None]:


cycleways_district_1_4 = filter_for_district(x_district_1_4, y_district_1_4)
cycleways_district_5_7 = filter_for_district(x_district_5_7, y_district_5_7)

In [None]:
x_district_1_4

array('d', [2.369103414954773, 2.369105862160331, 2.369106875143625, 2.369114117866479, 2.36913010408185, 2.36913487804702, 2.369137996500427, 2.369130100046732, 2.369102354168097, 2.369079241331071, 2.369077956861985, 2.368872311888239, 2.368809812205495, 2.368801899186301, 2.368738580824187, 2.368670860964883, 2.368636812528627, 2.368613150709158, 2.368612358871244, 2.368600745264712, 2.368598714720008, 2.368574425795794, 2.368418755409635, 2.368417692817263, 2.368385844883154, 2.368030923056307, 2.367991386804722, 2.367311699670686, 2.367309371355025, 2.366519794267492, 2.366502596086438, 2.366145444456016, 2.366137492190343, 2.366116855142304, 2.365968023757516, 2.365941978828986, 2.365694997301973, 2.365685631295672, 2.365682292369178, 2.365650421202715, 2.365605988329807, 2.365561424693939, 2.365548215124831, 2.365485498206277, 2.365483449345826, 2.365378777673753, 2.365366647494866, 2.364433128355796, 2.362493498379017, 2.361891702135769, 2.361514791805424, 2.360799077361141, 2.