# Region_access d2i and i2d time calculation

main goal: calculate d2i and i2d times using the centroids weighted by population 

We use the OSMR API that calculates the fastest time between two coordinates

## 1 - Import libraries and documents needed

In [1]:
import pandas as pd
from typing import Dict
import requests
import os
print(os.getcwd())
pd.set_option('display.max_columns', None)

c:\Users\LMENENDEZ\GitHub\MultiModX\notebooks


In [2]:
# documents needed
region_access = pd.read_csv(r"G:\Unidades compartidas\04_PROYECTOS I+D+i\2023 MultiModX\iii) Project\WP3 Scenario definition\Case study input data\Spain\v=0.7\infrastructure\regions_access\regions_access_v0.2.csv")
centroid_populations=pd.read_csv(r"G:\Unidades compartidas\04_PROYECTOS I+D+i\2023 MultiModX\iii) Project\WP3 Scenario definition\Case study input data\Spain\v=0.7\infrastructure\regions_access\centroid_populations.csv")
rail_stops=pd.read_csv(r"G:\Unidades compartidas\04_PROYECTOS I+D+i\2023 MultiModX\iii) Project\WP3 Scenario definition\Case study input data\Spain\v=0.7\infrastructure\rail_info\stops.txt")
air_stops=pd.read_csv(r"G:\Unidades compartidas\04_PROYECTOS I+D+i\2023 MultiModX\iii) Project\WP3 Scenario definition\Case study input data\Spain\v=0.7\infrastructure\airports_info\airports_coordinates_v1.1.csv")

## 2 - Format documents

In [3]:
# construct a dictionary with the equivalence between IATA and ICAO codes
airports=pd.read_csv(r"G:\Unidades compartidas\04_PROYECTOS I+D+i\2023 MultiModX\iii) Project\WP3 Scenario definition\Case study input data\Spain\v=0.7\infrastructure\airports_info\IATA_ICAO_Airport_codes_v1.3.csv")
airports["ICAO"] = airports["ICAO"].fillna(airports["IATA"])
iata_to_icao = airports.set_index("IATA")["ICAO"].to_dict()

In [4]:
# Replace IATA to ICAO codes in region_access
# and drop ES530, ES701, and ES702, that are irrelevant
def format_regions_access(regions_access: pd.DataFrame, iata_to_icao: Dict):
    regions_access["station"] = regions_access["station"].map(iata_to_icao).fillna(regions_access["station"]) 
    region_access.drop(region_access[region_access['region'].isin(["ES530", "ES701", "ES702"])].index, inplace=True)
    return regions_access

In [5]:
format_regions_access(region_access,iata_to_icao)

Unnamed: 0,region,station,layer,pax_type,avg_d2i,avg_i2d
0,ES111,LEST,air,all,47,47
1,ES111,LECO,air,all,31,31
2,ES111,LEVX,air,all,103,103
3,ES112,LEST,air,all,85,85
4,ES113,LEST,air,all,144,144
...,...,...,...,...,...,...
203,ES618,007151003,rail,all,56,56
204,ES618,007102002,rail,all,110,110
205,ES620,007161200,rail,all,39,39
206,ES620,007162002,rail,all,59,59


In [6]:
# places lat and lon in two different columns and changes its format to float
def format_centroid(centroid_populations: pd.DataFrame):
    centroid_populations['lat'] = centroid_populations['Centroid'].str.strip("()").str.split(",").str[1].astype(float)
    centroid_populations['lon'] = centroid_populations['Centroid'].str.strip("()").str.split(",").str[0].astype(float)
    centroid_populations.drop(columns=['Centroid'], inplace=True)
    return centroid_populations

In [7]:
format_centroid(centroid_populations)

Unnamed: 0,NUTS,lat,lon
0,DE933,53.354842,9.968232
1,DE934,53.029162,11.098782
2,DE935,53.256547,10.443584
3,DEA5A,50.905318,8.070128
4,DEA5B,51.589680,8.190063
...,...,...,...
455,DEG0P,50.874071,10.204370
456,ES242,40.712690,-0.760620
457,DE402,51.760935,14.327932
458,ES422,38.991908,-3.607724


In [8]:
# transforms stop_id into a string and adds two zeros in front
def format_rail_stops(rail_stops: pd.DataFrame):
    rail_stops['stop_id'] = rail_stops['stop_id'].astype(str).apply(lambda x: '00' + x)
    return rail_stops

In [9]:
format_rail_stops(rail_stops)

Unnamed: 0,stop_id,stop_name,stop_lat,stop_lon,location_type,parent_station,stop_timezone
0,007100367,PORT BOU(fr),42.431667,3.189722,5,,Europe/Amsterdam
1,007101003,EL ARAHAL,37.268056,-5.548611,1,,Europe/Amsterdam
2,007101005,MARCHENA,37.334167,-5.425556,1,,Europe/Amsterdam
3,007101007,OSUNA,37.233889,-5.115000,1,,Europe/Amsterdam
4,007101009,PEDRERA,37.222500,-4.893611,1,,Europe/Amsterdam
...,...,...,...,...,...,...,...
833,009457000,PORTALEGRE,39.199167,-7.461667,1,,Europe/Lisbon
834,009457117,ASSUMAR,39.138611,-7.381944,1,,Europe/Lisbon
835,009457174,ARRONCHES,39.096944,-7.333333,1,,Europe/Lisbon
836,009457315,SANTA EULALIA-A,39.006111,-7.253333,1,,Europe/Lisbon


## 3 - The main code

In [10]:
# function to calculate travel time between two different coordinates
def get_OSRM_time(lonA,latA,lonB,latB):
    locations='{},{};{},{}'.format(lonA,latA,lonB,latB)

    url =  'http://router.project-osrm.org/route/v1/driving/'+locations

    response = requests.get(url)
    if response.json()['code'] !='Ok':
        print(response.json())
    return response.json()['routes'][0]['duration'] / 60

In [11]:
# create a new dataframe named merged_all_stations with the coordinates of the region and the station
merged_df = region_access.merge(centroid_populations, left_on='region', right_on='NUTS', how='left')
merged_df_rail_stations =merged_df.merge(rail_stops[["stop_id","stop_lat","stop_lon"]], left_on='station', right_on='stop_id', how='left')
merged_df_all_stations =merged_df_rail_stations.merge(air_stops, left_on='station', right_on='icao_id', how='left')
merged_all_stations=merged_df_all_stations.drop(columns=["NUTS","stop_id","icao_id"])
merged_all_stations['stop_lat'] = merged_all_stations['stop_lat'].fillna(merged_all_stations['lat_y'])
merged_all_stations['stop_lon'] = merged_all_stations['stop_lon'].fillna(merged_all_stations['lon_y'])
merged_all_stations=merged_all_stations.drop(columns={"lat_y","lon_y"})
merged_all_stations=merged_all_stations.rename(columns={"lat_x":"latd","lon_x":"lond","stop_lat":"lati","stop_lon":"loni"})

In [12]:
d2i_list=[]
i2d_list=[]
new_time=[]
for index in merged_all_stations.index:
    lati=merged_all_stations.iloc[index]["lati"]
    loni=merged_all_stations.iloc[index]["loni"]
    latd=merged_all_stations.iloc[index]["latd"]
    lond=merged_all_stations.iloc[index]["lond"]
    d2i = round(get_OSRM_time(lond,latd,loni,lati)) #round it to the nearest integer as we do not need that much precision
    i2d = round(get_OSRM_time(loni,lati,lond,latd))
    d2i_list+=[d2i]
    i2d_list+=[i2d]

merged_all_stations["new_d2i"]=d2i_list
merged_all_stations["new_i2d"]=i2d_list

region_access_new=region_access
region_access_new["avg_d2i"]=d2i_list
region_access_new["avg_i2d"]=i2d_list

In [None]:
# region_access_new.to_csv("regions_access_v0.3.csv")