In [1]:
import os
import glob
import gzip
import pickle

import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import matplotlib.cm as cm
from matplotlib.colors import LogNorm

import shapely.wkt as wkt
from shapely.geometry import Point, LineString, box
from shapely.ops import nearest_points
import lxml.etree as ET
import network_io as nio
from itertools import combinations
import random

In [2]:
file_path = '../../../data/pop_1pm/paris_1pm_network.xml.gz'
districts = gpd.read_file("../../../data/visualisation/districts_paris.geojson")

base_output_links_no_policies = '../../../data/pop_1pm/output_1pm/'
base_network_no_policies = nio.read_network_data(base_output_links_no_policies)
base_network_no_policies.rename(columns={'link': 'id'}, inplace=True)

# Parse nodes and edges
nodes_dict = nio.parse_nodes(file_path)
df_edges = nio.parse_edges(file_path, nodes_dict)
gdf_edges = gpd.GeoDataFrame(df_edges, geometry='geometry', crs='EPSG:2154')
gdf_edges_4326 = gdf_edges.to_crs(epsg=4326)

# Spatial join to find districts overlapping with each edge
gdf_edges_with_districts = gpd.sjoin(gdf_edges_4326, districts, how='left', op='intersects')

# Group by edge and aggregate the district names
gdf_edges_with_districts = gdf_edges_with_districts.groupby('id').agg({
    'from': 'first',
    'to': 'first',
    'length': 'first',
    'freespeed': 'first',
    'capacity': 'first',
    'permlanes': 'first',
    'oneway': 'first',
    'modes': 'first',
    'geometry': 'first',
    'c_ar': lambda x: list(x.dropna())
}).reset_index()

# Rename the aggregated column to 'district'
gdf_edges_with_districts.rename(columns={'c_ar': 'district'}, inplace=True)

# Convert freespeed and capacity to numeric values
gdf_edges_with_districts['freespeed'] = pd.to_numeric(gdf_edges_with_districts['freespeed'], errors='coerce')
gdf_edges_with_districts['capacity'] = pd.to_numeric(gdf_edges_with_districts['capacity'], errors='coerce')

gdf_edges_with_highway = gdf_edges_with_districts.merge(base_network_no_policies[['id', 'osm:way:highway']], on='id', how='left')
gdf_edges_with_highway.head()

  if await self.run_code(code, result, async_=asy):
  if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all():


Unnamed: 0,id,from,to,length,freespeed,capacity,permlanes,oneway,modes,geometry,district,osm:way:highway
0,100315,24972409,24972408,16.18125678991161,8.333333,480.0,1.0,1,"bus,car,car_passenger","LINESTRING (2.33869 48.85181, 2.33847 48.85181)",[6.0],residential
1,100316,5904976363,24983651,14.860209130036054,8.333333,480.0,1.0,1,"bus,car,car_passenger,pt","LINESTRING (2.33874 48.85242, 2.33872 48.85229)",[6.0],tertiary
2,100317,24983651,5904976363,14.860209130036054,8.333333,960.0,2.0,1,"bus,car,car_passenger,pt","LINESTRING (2.33872 48.85229, 2.33874 48.85242)",[6.0],tertiary
3,100321,664205947,24972376,22.264539971200318,8.333333,960.0,2.0,1,"car,car_passenger","LINESTRING (2.33994 48.85200, 2.33986 48.85181)",[6.0],residential
4,100324,24972376,24972375,64.85327605625626,8.333333,480.0,1.0,1,"bus,car,car_passenger","LINESTRING (2.33986 48.85181, 2.33909 48.85152)",[6.0],residential


In [3]:
neighbouring_districts = {1: [2, 3, 4, 6, 7, 8],
                          2: [1, 3, 4, 9, 10],
                          3: [1, 2, 10, 11, 4],
                          4: [1, 3, 11, 12, 5],
                          5: [4, 12, 13, 14, 6],
                          6: [1, 5, 14, 15, 7],
                          7: [8, 1, 6,15,16],
                          8: [17, 9, 1, 7, 16],
                          9: [18, 10, 2, 8],
                          10: [3,2,9,18,19,11],
                          11: [20, 12, 4, 3, 10],
                          12:[11, 20, 13,4],
                          13: [5, 12, 14],
                          14: [5, 13, 15, 6],
                          15: [6, 14, 16, 7],
                          16: [7, 15, 17, 8],
                          17: [8, 16, 18],
                          18: [9, 17, 19, 10],
                          19: [18, 10, 20],
                          20: [11, 19, 12]}            

In [4]:
districts = list(neighbouring_districts.keys())
all_connected_subsets = []

for r in range(1, len(districts) + 1):
    for subset in combinations(districts, r):
        if nio.is_connected(set(subset), neighbouring_districts):
            all_connected_subsets.append(subset)

# Anzahl der verbundenen Teilmengen
num_connected_subsets = len(all_connected_subsets)

In [5]:
higher_order_roads = ['tertiary', 'secondary', 'primary', 'secondary_link', 'primary_link', 'tertiary_link']
filtered_gdf = gdf_edges_with_highway[gdf_edges_with_highway['osm:way:highway'].isin(higher_order_roads)]
filtered_gdf = gpd.GeoDataFrame(filtered_gdf, geometry='geometry', crs='EPSG:4326')
gdf_edges_with_highway = gpd.GeoDataFrame(gdf_edges_with_highway, geometry='geometry', crs='EPSG:4326')

In [6]:
# # Create a larger plot with thinner lines
# fig, ax = plt.subplots(figsize=(15, 15))
# gdf_edges_with_highway.plot(ax=ax, linewidth=0.5, color='grey', label = "Network")
# filtered_gdf.plot(ax=ax, linewidth=0.5, color = "blue", label = "Higher order roads")

# # Customize the plot (optional)
# plt.title('Streets on which to apply the policies')
# plt.xlabel('Longitude')
# plt.ylabel('Latitude')
# plt.legend()
# plt.show()

## Amend here the number of subsets that we want to create. 

In [7]:
# Ensure 'all_connected_subsets' is defined
# Randomly select 20 tuples from the list
random_subset = set(random.sample(all_connected_subsets, 5000))

In [8]:
# Generate combinations of the form 1, (1,2), (1,2,3), ..., 2, (2,3), (2,3,4), ..., up to 20
for i in range(1, 21):
    random_subset.add((i))
    
random_subset.add((1,2,3,4))
random_subset.add((5,6,7))

In [9]:
random_subset

{1,
 2,
 3,
 4,
 5,
 6,
 7,
 (1, 3, 4, 5, 6, 7, 8, 9, 10, 13, 16, 19, 20),
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 (3, 4, 5, 6, 7, 8, 14, 15, 16, 17, 18, 19, 20),
 15,
 16,
 17,
 18,
 19,
 20,
 (1, 2, 4, 6, 7, 10, 11, 18, 20),
 (5, 6, 8, 9, 12, 13, 15, 16),
 (1, 3, 6, 8, 10, 13, 14, 15, 16, 19, 20),
 (1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 15, 17, 18, 20),
 (2, 4, 5, 8, 10, 16, 17, 18),
 (3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18),
 (1, 5, 6, 7, 12, 13, 20),
 (1, 2, 6, 7, 9, 10, 15, 17, 18, 19),
 (1, 2, 4, 5, 8, 9, 10, 14, 15, 17, 18),
 (1, 2, 3, 5, 6, 7, 10, 12, 13, 14, 18, 20),
 (1, 2, 6, 8, 9, 10, 11, 12, 13, 16, 17, 18, 19, 20),
 (2, 4, 5, 6, 7, 9, 11, 14, 19, 20),
 (1, 2, 5, 6, 7, 9, 11, 12, 13, 15, 16, 17, 18, 19, 20),
 (2, 4, 5, 6, 10, 13, 15, 16, 19),
 (1, 4, 5, 6, 7, 10, 11, 13, 17, 18, 19, 20),
 (1, 2, 3, 6, 7, 8, 11, 12, 13, 14, 16, 17, 18, 20),
 (1, 2, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20),
 (1, 2, 4, 7, 8, 9, 10, 14, 15, 17, 18),
 (1, 2, 3, 4, 5, 7, 8, 9, 11, 12, 13, 14, 15, 1

In [11]:
# Save the random_subset list to a file using pickle
with open('pop_1pm_subset_5000_comb.pkl', 'wb') as file:
    pickle.dump(random_subset, file)

In [12]:
import os
import gc
import random
import pandas as pd
import gzip
import xml.etree.ElementTree as ET

# Directory to save the files
output_base_dir = '../../../data/pop_1pm_with_policies/networks/'

# Number of files per directory
files_per_dir = 100

# Function to write XML to a compressed .gz file
def write_xml_to_gz(xml_tree, file_path):
    with gzip.open(file_path, 'wb') as f:
        xml_tree.write(f, encoding='utf-8', xml_declaration=True)

# Ensure the base output directory exists
os.makedirs(output_base_dir, exist_ok=True)

# Create and save the networks
for i, combination in enumerate(random_subset):
    if isinstance(combination, int):
        combination = (combination,)
    df_copy = gdf_edges_with_highway.copy()
    df_copy['policy_introduced'] = df_copy['district'].apply(
        lambda districts: any(d in districts for d in combination)
    ) & df_copy.apply(
        lambda row: 'car' in row['modes'] and row['osm:way:highway'] in higher_order_roads, axis=1
    )
    # Modify freespeed and capacity based on the policy_introduced condition
    df_copy.loc[df_copy['policy_introduced'], 'capacity'] = df_copy.loc[df_copy['policy_introduced'], 'capacity'] / 2

    # Determine the subdirectory based on the file index
    dir_index = (i // files_per_dir) + 1
    subdir_name = f"networks_{dir_index * files_per_dir}"
    output_dir = os.path.join(output_base_dir, subdir_name)
    
    # Create the subdirectory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Determine the filename based on the combination
    if len(combination) == 1:
        filename = f"network_d_{combination[0]}.xml.gz"
    else:
        filename = f"network_d_{'_'.join(map(str, combination))}.xml.gz"
    
    # Convert the DataFrame to XML
    xml_tree = nio.dataframe_to_xml(df_copy, nodes_dict)
    
    # Write the XML to a compressed .gz file
    file_path = os.path.join(output_dir, filename)
    nio.write_xml_to_gz(xml_tree, file_path)

    # Clear the dataframe from memory
    del df_copy
    gc.collect()

# Example: Display the file paths of the saved files
output_files = [os.path.join(root, f) for root, _, files in os.walk(output_base_dir) for f in files]
print(output_files)

['../../../data/pop_1pm_with_policies/networks/.DS_Store', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_4_5_6_7_8_11_13_14_15_16_17_19_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_3_6_8_10_11_16_17_19.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_2_3_6_7_14_15_16.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_3_4_5_8_9_11_12_13_14_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_5_6_7_8_12_13_15_17_18_19_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_4_6_7_8_9_11_13_14_15_16_17_18_19_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_2_4_5_8_9_10_11_12_13_16_17_18_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_2_4_5_6_8_9_10_12_15_20.xml.gz', '../../../data/pop_1pm_with_policies/networks/networks_3100/network_d_1_4_5_

In [13]:
# dataframes = []
# for combination in random_subset:
#     # print(combination)
#     if isinstance(combination, int):
#         combination = (combination,)
#     df_copy = gdf_edges_with_highway.copy()
#     df_copy['policy_introduced'] = df_copy['district'].apply(
#         lambda districts: any(d in districts for d in combination)
#     ) & df_copy.apply(
#         lambda row: 'car' in row['modes'] and row['osm:way:highway'] in higher_order_roads, axis=1
#     )
#     # Modify freespeed and capacity based on the policy_introduced condition
#     df_copy.loc[df_copy['policy_introduced'], 'capacity'] = df_copy.loc[df_copy['policy_introduced'], 'capacity'] / 2
#     dataframes.append(df_copy)
#     # print(df_copy['policy_introduced'].value_counts())

In [14]:
# # Directory to save the files
# output_dir = '../../../data/pop_1pct_with_policies/networks/'
# os.makedirs(output_dir, exist_ok=True)

# # Create and save the networks
# for i, df in enumerate(dataframes):
#     # Determine the filename based on the combination
#     combination = random_subset[i]
#     if len(combination) == 1:
#         filename = f"network_d_{combination[0]}.xml.gz"
#     else:
#         filename = f"network_d_{'_'.join(map(str, combination))}.xml.gz"
    
#     # Convert the DataFrame back to XML
#     xml_tree = nio.dataframe_to_xml(df, nodes_dict)
    
#     # Write the XML to a compressed .gz file
#     file_path = os.path.join(output_dir, filename)
#     nio.write_xml_to_gz(xml_tree, file_path)

# # Example: Display the file paths of the saved files
# output_files = [os.path.join(output_dir, f) for f in os.listdir(output_dir)]

In [15]:
# import os
# import nio  # Assuming `nio` is a custom module or library you're using
# import random

# # Directory to save the files
# output_base_dir = '../../../data/pop_1pct_with_policies/networks/'

# # Number of files per directory
# files_per_dir = 100

# # Create and save the networks
# for i, df in enumerate(dataframes):
#     # Determine the subdirectory based on the file index
#     dir_index = (i // files_per_dir) + 1
#     subdir_name = f"networks_{dir_index * files_per_dir}"
#     output_dir = os.path.join(output_base_dir, subdir_name)
    
#     # Create the subdirectory if it doesn't exist
#     os.makedirs(output_dir, exist_ok=True)
    
#     # Determine the filename based on the combination
#     combination = random_subset[i]
#     if len(combination) == 1:
#         filename = f"network_d_{combination[0]}.xml.gz"
#     else:
#         filename = f"network_d_{'_'.join(map(str, combination))}.xml.gz"
    
#     # Convert the DataFrame back to XML
#     xml_tree = nio.dataframe_to_xml(df, nodes_dict)
    
#     # Write the XML to a compressed .gz file
#     file_path = os.path.join(output_dir, filename)
#     nio.write_xml_to_gz(xml_tree, file_path)

# # Example: Display the file paths of the saved files
# output_files = [os.path.join(root, f) for root, _, files in os.walk(output_base_dir) for f in files]
# print(output_files)
