In [None]:
import os
import matplotlib.pyplot as plt
import geopandas as gpd
import osmnx as ox
from geopandas import GeoDataFrame, GeoSeries
from osmnx import io
import glob

project_crs = 'epsg:3857'

import warnings
import pandas as pd

warnings.filterwarnings(action='ignore')

pjr_loc = os.path.dirname(os.getcwd())


In [None]:
# This code selects 10% of the streets randomly  to analysis manually  
# parameters
first_time = False
for place in ['Tel_Aviv','San_Francisco__California','Turin_Italy']:
    # if place!='San_Francisco__California':
    #     continue
    name ='name'

    if first_time:
        # Get Dataframe and choose only the simplified ones
        df = gpd.read_file(f'places/{place}/simp.shp')


        # Select 20% randomly
        unique_names = df[df['is_simplif']==1]['name'].unique()
        selected_names = pd.DataFrame(pd.Series(unique_names).sample(frac=0.1, random_state=42))
        selected_names['is_same']=''
        selected_names.set_index(0, inplace=True,)
        selected_names.index.name= 'street'
    else:
        selected_names = pd.read_csv(f'places/{place}/selected_streets.csv').set_index('Street name')[['Status 1','Status 2']]

    # Save the selected original streets in new shp files (regular and dissolve)
    df_2 = gpd.read_file(f'places/{place}/osm_data.gpkg',layer = 'edges')
    if place=='Tel_Aviv':
        df_2[name] = df_2['name:en']
        df_2.drop(columns='name:en',inplace=True)
    res = df_2[df_2[name].isin(selected_names.index)]
    res.to_file(f'places/{place}/selected_streets.shp')
    res.dissolve(by=name).sort_values(by=name).to_file(f'places/{place}/selected_streets.shp')
    # Create df to be filled in manually
    code_to_repeat = "selected_names.loc['',:]= []"

    # Loop to repeat the code
    for street in selected_names.index:
        print(f"selected_names.loc['{street}',:]= []")

In [None]:
#  Data for figure 8:
#('Tel_Aviv', 'Tel_Aviv')

# ('Turin_Italy', 'Turin'),
# ('San_Francisco__California', 'San_Francisco'),
## Choose locations and create folders if necessary
place = 'Turin_Italy'
data_folder = os.path.join(pjr_loc, f'places/{place}')
os.makedirs(f'{data_folder}/csv/', exist_ok=True)  # Here the csv files will be stored
before_df = gpd.read_file(f'{data_folder}/before_df.shp')
after_df = gpd.read_file(f'{data_folder}/network_new.shp').drop(columns='index')


# step 1-4 as one def
def street_its_connections(network, is_simplified=False):
    """
    Perform a spatial join to identify all streets connected to each street in the dataset.
    :param is_simplified:Decide whether to store the status of 'is_simplified'.
    :param network:
    :return:
    """
    # vars
    str_name = 'name_left'
    con_str_name = 'name_right'
    # 1.	Copy network
    df_analysis = network.copy()
    # 2.	Internal intersection
    s_join_analysis = gpd.sjoin(df_analysis, network)
    # 3.	Delete those with the same name

    s_join_analysis2 = s_join_analysis[s_join_analysis[str_name] != s_join_analysis[con_str_name]]
    # 4.	For each group: name: list
    group_name = s_join_analysis2.groupby(str_name)
    dic_data = {}

    def connected_streets(str_lines):
        """
        Populate the dictionary with the name of the current street and all its connected streets.
        :param str_lines:
        :return:
        """
        if is_simplified:  # after SOD process we also want to know if all/part of the street was simplified.
            dic_data[str_lines[str_name].iloc[0]] = (
            list(str_lines[con_str_name].unique()), True if (str_lines['is_simplif_left'] == 1).any() else False)
        else:
            dic_data[str_lines[str_name].iloc[0]] = (list(str_lines[con_str_name].unique()), False)

    group_name.apply(connected_streets)
    return dic_data


# Create dictionary of street: connected streets before and after
dic_before = street_its_connections(before_df)
print('dic_before')
dic_after = street_its_connections(after_df, is_simplified=True)
print('dic_after')
# 5. Compare between the two dictionaries
df1 = pd.DataFrame(index=dic_after.keys(), data=dic_after.values(),
                   columns=['ConnectedStreets', 'is_simplified']).reset_index(names='StreetName')
df2 = pd.DataFrame(index=dic_before.keys(), data=dic_before.values(),
                   columns=['ConnectedStreets', 'is_simplified']).reset_index(names='StreetName')


def common_streets(row):
    """
    This function compares the connected streets of a given street in the current row with those in another DataFrame (df2).
    It returns the count of common connected streets.

    Parameters:
    row (pandas.Series): A row from the original DataFrame containing 'StreetName' and 'ConnectedStreets' columns.

    Returns:
    int: The number of common connected streets between the given row and the corresponding street in df2.
    """
    # Filter df2 to find the streets with the same name as the current row's street
    streets_in_old = df2[df2['StreetName'] == row['StreetName']]

    # Check if there are no matching streets in df2
    if streets_in_old.empty:
        return 0
    else:
        # Calculate the intersection of connected streets between the current row and the matched street in df2
        return len(set(row['ConnectedStreets']) & set(streets_in_old['ConnectedStreets'].iloc[0]))


def unique_streets_in_df1(row):
    """
    This function compares the connected streets of a given street in the current row with those in another DataFrame (df2).
    It returns the count of unique connected streets that are present in the current row but not in df2.

    Parameters:
    row (pandas.Series): A row from the original DataFrame containing 'StreetName' and 'ConnectedStreets' columns.

    Returns:
    int: The number of unique connected streets that are in the current row but not in the corresponding street in df2.
    """
    # Filter df2 to find the streets with the same name as the current row's street
    streets_in_old = df2[df2['StreetName'] == row['StreetName']]

    # Check if there are no matching streets in df2
    if streets_in_old.empty:
        return 0
    else:
        # Calculate the unique connected streets in the current row that are not in the matched street in df2
        return len(set(row['ConnectedStreets']) - set(streets_in_old['ConnectedStreets'].iloc[0]))


def unique_streets_in_df2(row):
    """
    This function compares the connected streets of a given street in the current row with those in another DataFrame (df2).
    It returns the count of unique connected streets that are present in df2 but not in the current row.

    Parameters:
    row (pandas.Series): A row from the original DataFrame containing 'StreetName' and 'ConnectedStreets' columns.

    Returns:
    int: The number of unique connected streets that are in the corresponding street in df2 but not in the current row.
    """
    # Filter df2 to find the streets with the same name as the current row's street
    streets_in_old = df2[df2['StreetName'] == row['StreetName']]

    # Check if there are no matching streets in df2
    if streets_in_old.empty:
        return 0
    else:
        # Calculate the unique connected streets in the matched street in df2 that are not in the current row
        return len(set(streets_in_old['ConnectedStreets'].iloc[0]) - set(row['ConnectedStreets']))


df1['CommonStreetsCount'] = df1.apply(common_streets, axis=1)
df1['UniqueStreetsInAfterCount'] = df1.apply(unique_streets_in_df1, axis=1)
df1['UniqueStreetsInBeforeCount'] = df1.apply(unique_streets_in_df2, axis=1)

df1['rate_success'] = (df1['CommonStreetsCount'] / df1[
    ['CommonStreetsCount', 'UniqueStreetsInAfterCount', 'UniqueStreetsInBeforeCount']].sum(axis=1) * 100).round(0)

# Calculate rate success
df1['rate_success'] = (df1['CommonStreetsCount'] / df1[
    ['CommonStreetsCount', 'UniqueStreetsInAfterCount', 'UniqueStreetsInBeforeCount']].sum(axis=1) * 100).round(0)

# Update the original data source
after_df2 = after_df.set_index('name')
after_df2['rate_success'] = df1.set_index('StreetName')['rate_success']
after_df2 = after_df2.reset_index().dissolve(by='name')
after_df2.to_file(f'{data_folder}/after_df.shp')
df1.to_csv(f'{data_folder}/csv/after_df.csv')
df2.to_csv(f'{data_folder}/csv/before_df.csv')
rate_success = 'rate_success'
is_simplified = 'is_simplified'
print(f'mean: {df1[rate_success].mean()}')
print(f'mean_simplified: {df1[df1[is_simplified]][rate_success].mean()}')
print(f'mean_no_simplified: {df1[~df1[is_simplified]][rate_success].mean()}')