# Comprehensive accessibility script

## 0. Variables definition and data import

In [23]:
# definitions
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
from datetime import datetime,date,timedelta
import matplotlib.pyplot as plt
from itertools import product # needed for generating all combinations of O-D pairs
sys.argv.append(["--max-memory", "8G"])


data_folder = "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"


# regional level files: (require previous editing)
oas_centroids_file = f"{data_folder}/processed/OA_centroids_TyneWear.gpkg" # used for population origin
oas_file = f"{data_folder}/processed/authorities/OA_TyneWear.gpkg" # needed for visualisation purposes
region_lads_file = f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries
workingplacezones_centroids_file = f"{data_folder}/processed/authorities/WPZ_centroids_tynewear.gpkg" # needed for destinations centroids coordinates
# greenspace_sites_file = f"{data_folder}/processed/accessibility/greenspace-sites_tynewear.gpkg" # needed for calcualting opportunities at greenspaces (area)
# greenspace_entrances_file = f"{data_folder}/processed/accessibility/accessTOgs_tynewear.gpkg" # needed for destinations centroids coordinates
greenspace_file = f"{data_folder}/processed/accessibility/greenspace_tynewear_edited.gpkg"
jobs_file = f"{data_folder}/processed/accessibility/wpz_tynewear_occupation_edited.csv"

# national level files
# greenspace_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
osm_data_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
gtfs_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip"

In [24]:
# import

# origins (IE output areas, OAs)
oas_centroids = gpd.read_file(oas_centroids_file,
                              layer="OA_centroids_TyneWear")
oas_centroids['id'] = oas_centroids['OA11CD'] # Origin dataset must contain an 'id' column for r5py
oas_centroids_wgs84 = oas_centroids.to_crs("epsg:4326")
oas_centroids.head()

# destination data
# green space sites' entrances
gs_entrances = gpd.read_file(greenspace_file,
                        layer = "access_points")

gs_entrances.head() # Destination dataset already contains an 'id' column
# WPZ centroids
wpz_centroids = gpd.read_file(workingplacezones_centroids_file,
                              layer = "WPZ_centroids_tynewear")
wpz_centroids.head()
wpz_centroids['id'] = wpz_centroids['wz11cd'] # Destination dataset must contain an 'id' column for r5py

gs_sites = gpd.read_file(greenspace_file,
                         layer = "sites")

# network data
# uploaded in the sequent operation

# opportunities / land use data
jobs_per_wpz_df = pd.read_csv(jobs_file) # working place zones, population (as a proxy for n of jobs)
# note: opportunities column is called "pop"

In [25]:
gs_entrances.explore()

### CRS conversion

In [26]:
# Converting the original files' crs to GWS84, which is compatible with GTFS and OSM data
oas_centroids = oas_centroids.to_crs("epsg:4326")
gs_entrances = gs_entrances.to_crs("epsg:4326")
gs_sites = gs_sites.to_crs("epsg:4326")
wpz_centroids = wpz_centroids.to_crs("epsg:4326")

### Origins and destinations

In [5]:
wpz_centroids.head()

Unnamed: 0,OBJECTID,wz11cd,GlobalID,geometry,id
0,2,E33000251,{AF2BD35C-B624-4E2D-9C78-F26DF4FCABCE},POINT (-1.41992 54.91839),E33000251
1,3,E33000799,{8CB93749-3349-462C-93C7-B6E321CC765C},POINT (-1.61606 54.97382),E33000799
2,4,E33000257,{03204BF6-50A6-4AD1-855F-C7BBE6D8137B},POINT (-1.53272 54.90010),E33000257
3,5,E33000079,{53333BDF-9792-4370-94AB-BE7853FA2ACA},POINT (-1.62268 55.01104),E33000079
4,8,E33000174,{35114C58-FAA7-4E83-9724-ACED166052D5},POINT (-1.50942 55.02269),E33000174


In [6]:
oas_centroids.head()

Unnamed: 0,OBJECTID,OA11CD,GlobalID,geometry,id
0,126926,E00041377,c03c9813-26f3-41f9-85e5-d4cdf3742ca0,POINT (-1.60200 54.96055),E00041377
1,126927,E00041435,16e6607e-0b59-4f6f-8ec6-06a7396a70a5,POINT (-1.57717 54.89559),E00041435
2,126928,E00041745,4b5fa995-b251-4ee7-9a97-aef0a2598fe3,POINT (-1.56619 54.92993),E00041745
3,126929,E00041432,6e660884-3917-4e46-a693-bad0821318cb,POINT (-1.56719 54.89580),E00041432
4,126930,E00041742,0bfb7f06-a910-4fa2-8db1-e79d319ba232,POINT (-1.56562 54.93184),E00041742


In [7]:
gs_entrances.head()

Unnamed: 0,id,accessType,refToGreenspaceSite,geometry
0,idD93E3AB6-BDCE-483D-B3CF-4242FA90A0B7,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (-1.55733 55.03322)
1,id951F323D-8E88-4A5B-B9A4-37E0D69DD870,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (-1.56184 55.03333)
2,id0E14522B-427F-47C1-B043-BC3847ABE673,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (-1.56197 55.03340)
3,id548D0EAC-E6BE-4DFA-B90C-DB631A75309B,Pedestrian,idE56DE841-2BC6-13A9-E053-AAEFA00A0D0E,POINT (-1.55981 55.03343)
4,id0FECA8F4-6053-4147-A11D-62B01EC6C135,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (-1.55989 55.03344)


In [27]:
# for testing purposes:
n=99
origins = oas_centroids.loc[:n, :]
destinations = wpz_centroids.loc[:n, :]

# origins:
#   OAs
# destinations:
#   gs: entrances + OAs centroids
#   jobs: wpz centroids + OAs centroids


# destinations common fields: 'id', 'geometry'
# destinations = wpz_centroids
# simply concatenate the dataframes...
# need to keep the info on greenspace site's name to link with the entrances later on

# -------

# generate dataframe with all from_id and all to_id pairs
# (empty for now, to be filled up later on)
prod = product(origins['id'].unique(),
               destinations['id'].unique())
empty_ttm = pd.DataFrame(prod)
empty_ttm.columns = ['from_id', 'to_id']
empty_ttm.head()


Unnamed: 0,from_id,to_id
0,E00041377,E33000251
1,E00041377,E33000799
2,E00041377,E33000257
3,E00041377,E33000079
4,E00041377,E33000174


### Opportunities

In [9]:
# jobs: n of employees per WPZ
# greenspace: area of site


# add column with opportunity ... one for all?

## 1. Travel time matrix computation

### Generate the transport network

Compute the network starting from OSM and GTFS data

In [10]:
# load in transport network
transport_network = TransportNetwork(
    osm_data_file,
    [
        gtfs_data_file
    ]
)

### Create an empty matrix that contains all origins and destinations to be used later on

This table will be filled up once we calculate the ttm

In [11]:
# destinations
# gs: entrances + oas centroids
# jobs: wpz centroids + oas centroids
origins
from itertools import product
# generate dataframe with all from_id and all to_id pairs
prod = product(origins['id'].unique(),
               destinations['id'].unique())
empty_ttm = pd.DataFrame(prod)
empty_ttm.columns = ['from_id', 'to_id']
empty_ttm.head()

Unnamed: 0,from_id,to_id
0,E00041377,E33000251
1,E00041377,E33000799
2,E00041377,E33000257
3,E00041377,E33000079
4,E00041377,E33000174


### Travel time matrix

The following piece of code is split in 2:
- first part is definition of variables that will be inputted as parameters in the ttm computation
- second part is the loop to generate ttm for several transport modes

In [12]:
# defining variables
date_time = '2023,01,19,9,30' # CHOOSE BEST DATE/TIME
# max_time = dt.timedelta(seconds=900) # SET TO 15 MIN
walking_speed = 4.8
cycling_speed = 16
# dataframe to match legmode and transitmode objects (to be inputted in the ttm computer):
modes_lut = pd.DataFrame([
                          ['transit', TransitMode.TRANSIT, LegMode.WALK],
                          ['car', '', LegMode.CAR],
                          ['bicycle', '', LegMode.BICYCLE],
                          ['walk','', LegMode.WALK],
                         ],
                         columns = ('Mode', 'Transit_mode', 'Leg_mode'))

# function to generate custom list of transit+transport mode for the parameter transport_modes in TravelTimeMatrixComputer
def list_making(s,z):
    return [s] + [z]

ttm_complete = empty_ttm

# loop to compute a ttm for all the modes and generate one single ttm table in output
for row in modes_lut.itertuples():
    start_time = dt.datetime.now()
    mode = row.Mode
    transit_mode = row.Transit_mode
    leg_mode = row.Leg_mode
    transport_mode = list_making(transit_mode,leg_mode) # creating list of objects for transport_modes parameter

    print('The current mode is:', mode, ', transit is:', transit_mode, ', transport var is:', transport_mode)
    ttm_computer = TravelTimeMatrixComputer(
        transport_network,
        origins = origins,
        destinations = destinations,
        departure = dt.datetime.strptime(date_time, '%Y,%m,%d,%H,%M'),
        # max_time = max_time,
        speed_walking = walking_speed,
        speed_cycling = cycling_speed,
        transport_modes = transport_mode
    )

    ttm = ttm_computer.compute_travel_times()
    ttm = ttm.rename(columns = {'travel_time':f'time_{mode}'}) # renaming 'travel_time' column (automatically generated) to 'time_{mode of transport}'
    ttm.isna().sum() # checking for empty values, to see if the ttm actually calculated something
    #  merging the empty table generated before (with all possible origins and destinations) with the ttm, per each mode adding a travel time column
    ttm_complete = ttm_complete.merge(ttm,
                    how ='outer',
                    left_on = ['from_id','to_id'],
                    right_on = ['from_id','to_id'])
    
    print('finished calculating ttm for mode', mode)
    end_time = datetime.now()
    print('Duration for', mode, ': {}'.format(end_time - start_time))

The current mode is: transit , transit is: TransitMode.TRANSIT , transport var is: [<TransitMode.TRANSIT: <java object 'com.conveyal.r5.api.util.TransitModes'>>, <LegMode.WALK: <java object 'com.conveyal.r5.api.util.LegMode'>>]




finished calculating ttm for mode transit
Duration for transit : 0:00:09.970723
The current mode is: car , transit is:  , transport var is: ['', <LegMode.CAR: <java object 'com.conveyal.r5.api.util.LegMode'>>]
finished calculating ttm for mode car
Duration for car : 0:00:24.878739
The current mode is: bicycle , transit is:  , transport var is: ['', <LegMode.BICYCLE: <java object 'com.conveyal.r5.api.util.LegMode'>>]
finished calculating ttm for mode bicycle
Duration for bicycle : 0:00:20.614300
The current mode is: walk , transit is:  , transport var is: ['', <LegMode.WALK: <java object 'com.conveyal.r5.api.util.LegMode'>>]
finished calculating ttm for mode walk
Duration for walk : 0:00:02.117369


## 2. Accessibility calculation

Using [jamaps/tracc](https://github.com/jamaps/tracc) package

## Accessibility to jobs

In [16]:
ttm_jobs = ttm_complete # saving a copy of the matrix (the following operations will add columns to it, but we want to keep the original one also)

# generate tracc cost object
ttm_jobs_tracc = tracc.costs(ttm_jobs)

modes_list = ['transit',
              'car',
              'bicycle',
              'walk']

# empty dataframe to be filled up in the next for loop
acc_pot_jobs = origins[['id']]

for m in modes_list:
    # generate variable names to be used in the tracc function below
    cost_name = 'time_' + m
    travel_costs_ids = ["from_id","to_id"]
    supplyID = "wpz11cd"
    impedence_param = 15 # value for impedence function, to be changed as needed
    impedence_param_string = str(impedence_param)
    cost_output = 'cum_' + impedence_param_string + '_' + m
    acc_column_name = 'pot_cum_acc_' + impedence_param_string + '_' + m
    opportunity = "pop"
# Computing impedance function based on a 15 minute travel time threshold.
    ttm_jobs_tracc.impedence_calc(
        cost_column = cost_name,
        impedence_func = "cumulative",
        impedence_func_params = impedence_param, # to calculate n of jobs in n min threshold
        output_col_name = cost_output,
        prune_output = False
    )

# Setting up the accessibility object. This includes joining the destination data to the travel time data
    acc_jobs= tracc.accessibility(
            travelcosts_df = ttm_jobs_tracc.data,
            supply_df = jobs_per_wpz_df,
            travelcosts_ids = travel_costs_ids,
            supply_ids = supplyID
        )
    acc_jobs.data.head()

# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
    # acc_pot_jobs = acc_jobs.potential(
    #         opportunity = "pop",
    #         impedence = cost_output,
    #         output_col_name= "pot_acc_" + cost_output
    #         )
    # the above function generate overwrite the column at every loop
    # so we reproduce the same function (from tracc documentation) per each mode:
    acc_jobs.data[acc_column_name] = acc_jobs.data[opportunity] * acc_jobs.data[cost_output]
    group_sum_bymode_acc = acc_jobs.data.groupby(acc_jobs.data[travel_costs_ids[0]])[[acc_column_name]].sum()
    acc_pot_jobs = acc_pot_jobs.merge(group_sum_bymode_acc,
                    how ='outer',
                    left_on = 'id',
                    right_on = 'from_id')

In [21]:
acc_pot_jobs.head()

Unnamed: 0,id,pot_cum_acc_15_transit,pot_cum_acc_15_car,pot_cum_acc_15_bicycle,pot_cum_acc_15_walk
0,E00041377,3453,40219,15398,0
1,E00041435,0,28116,0,0
2,E00041745,0,30796,0,0
3,E00041432,0,12546,0,0
4,E00041742,0,28190,0,0


In [None]:
# saving output to external file

In [14]:
# --------------
# EDIT TO WORK ON ALL MODES
# ---------------

# Computing impedance function based on a 15 minute travel time threshold.
ttm_jobs_tracc.impedence_calc(
    cost_column = "time_transit",
    impedence_func = "cumulative",
    impedence_func_params = 15, # to calculate n of jobs in 15 min
    output_col_name = "cum_15",
    prune_output = False
)

# Setting up the accessibility object. This includes joining the destination data to the travel time data
acc_jobs= tracc.accessibility(
        travelcosts_df = ttm_jobs_tracc.data,
        supply_df = jobs_per_wpz_df,
        travelcosts_ids = ["from_id","to_id"],
        supply_ids = "wpz11cd" # edit?
    )
acc_jobs.data.head()

# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
acc_pot_jobs = acc_jobs.potential(
        opportunity = "pop",
        impedence = "cum_15"
        )


## Accessibility to greenspace

In [None]:
ttm_greenspace = ttm_complete # saving a copy of the matrix (the following operations will add columns to it, but we want to keep the original one also)


# generate tracc cost object
ttm_gs_tracc = tracc.costs(ttm_greenspace)

modes_list = ['transit',
              'car',
              'bicycle',
              'walk']

# empty dataframe to be filled up in the next for loop
acc_pot_gs = origins[['id']]

for m in modes_list:
    # generate variable names to be used in the tracc function below
    cost_name = 'time_' + m
    travel_costs_ids = ["from_id","to_id"]
    # supplyID = "wpz11cd" ## ---------------- TO BE CHANGED ACCORDINGLY __________
    impedence_param = 15 # value for impedence function, to be changed as needed
    impedence_param_string = str(impedence_param)
    cost_output = 'cum_' + impedence_param_string + '_' + m # naming depends on impedence function threshold
    acc_column_name = 'pot_cum_acc_' + impedence_param_string + '_' + m # naming depends on impedence function threshold
    opportunity = "pop"
# Computing impedence function based on a 15 minute travel time threshold.
    ttm_gs_tracc.impedence_calc(
        cost_column = cost_name,
        impedence_func = "cumulative",
        impedence_func_params = impedence_param, # to calculate n of jobs in n min threshold
        output_col_name = cost_output,
        prune_output = False
    )
# Setting up the accessibility object. This includes joining the destination data to the travel time data
# this needed to be done differently for greenspace, as opportunity is sites's area cumulative sum
# A. Filtering only rows with time travel within the threshold
    df_tracc_15min = ttm_gs_tracc.data[ttm_gs_tracc.data.loc[:,cost_output]==1]

#  ------------FROM HERE----------------------------------------------

# B. Filter entrances (only one per park)
    # associate park ID to entrances
    accesspoints_withPark = pd.merge(df_tracc_15min,
                                    accesspoints[['id','refToGreenspaceSite']],
                                    left_on='to_id',
                                    right_on='id',
                                    how='right'
                                    )
    oneaccess_perpark = accesspoints_withPark.sort_values("travel_time").drop_duplicates(["from_id", "refToGreenspaceSite"])
    oneaccess_perpark.head()
# C. Assign metric as sum[parks' area]
    # generate df with area per site
    oneaccess_perpark_witharea = pd.merge(oneaccess_perpark[['from_id', 'refToGreenspaceSite']],
                                        together_again[['id', 'area_m2']],
                                        left_on='refToGreenspaceSite',
                                        right_on='id'
                                        )
    # generate df with one row pr OA and sum of sites' areas
    OAs_metric = oneaccess_perpark_witharea.groupby(['from_id'])['area_m2'].sum().reset_index()

#  ----------------------------------------------------------

# Setting up the accessibility object. This includes joining the destination data to the travel time data
# this needed to be done differently for greenspace, as opportunity is sites's area cumulative sum
    acc_jobs= tracc.accessibility(
            travelcosts_df = ttm_jobs_tracc.data,
            supply_df = jobs_per_wpz_df,
            travelcosts_ids = travel_costs_ids,
            supply_ids = supplyID
        )
    acc_jobs.data.head()

# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
    # acc_pot_jobs = acc_jobs.potential(
    #         opportunity = "pop",
    #         impedence = cost_output,
    #         output_col_name= "pot_acc_" + cost_output
    #         )
    # the above function generate overwrite the column at every loop
    # so we reproduce the same function (from tracc documentation) per each mode:
    acc_jobs.data[acc_column_name] = acc_jobs.data[opportunity] * acc_jobs.data[cost_output]
    group_sum_bymode_acc = acc_jobs.data.groupby(acc_jobs.data[travel_costs_ids[0]])[[acc_column_name]].sum()
    acc_pot_jobs = acc_pot_jobs.merge(group_sum_bymode_acc,
                    how ='outer',
                    left_on = 'id',
                    right_on = 'from_id')

In [22]:
acc_jobs.data.head()

Unnamed: 0,from_id,to_id,time_transit,time_car,time_bicycle,time_walk,cum_15_transit,cum_15_car,cum_15_bicycle,cum_15_walk,cum_15,wpz11cd,pop,pot_cum_acc_15_walk
0,E00041377,E33000251,69.0,17,68.0,,0,0,0,0,0,E33000251,656,0
1,E00041377,E33000799,15.0,7,9.0,26.0,1,1,1,0,1,E33000799,1118,0
2,E00041377,E33000257,39.0,16,50.0,116.0,0,0,0,0,0,E33000257,2842,0
3,E00041377,E33000079,30.0,11,29.0,77.0,0,1,0,0,0,E33000079,214,0
4,E00041377,E33000174,52.0,16,54.0,,0,0,0,0,0,E33000174,869,0


**A.** filter entrances (one per park)

**B.** assign metric as sum[park size]