# Comprehensive accessibility script

## 0. Variables definition and data import

In [1]:
# definitions
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
from datetime import datetime,date,timedelta
import matplotlib.pyplot as plt
from itertools import product # needed for generating all combinations of O-D pairs
sys.argv.append(["--max-memory", "8G"])


data_folder = "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"


# regional level files: (require previous editing)
oas_centroids_file = f"{data_folder}/processed/OA_centroids_TyneWear.gpkg" # used for population origin
oas_file = f"{data_folder}/processed/authorities/OA_TyneWear.gpkg" # needed for visualisation purposes
region_lads_file = f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries
workingplacezones_centroids_file = f"{data_folder}/processed/authorities/WPZ_centroids_tynewear.gpkg" # needed for destinations centroids coordinates
# greenspace_sites_file = f"{data_folder}/processed/accessibility/greenspace-sites_tynewear.gpkg" # needed for calcualting opportunities at greenspaces (area)
# greenspace_entrances_file = f"{data_folder}/processed/accessibility/accessTOgs_tynewear.gpkg" # needed for destinations centroids coordinates
greenspace_file = f"{data_folder}/processed/accessibility/greenspace_tynewear_edited.gpkg"
jobs_file = f"{data_folder}/processed/accessibility/wpz_tynewear_occupation_edited.csv"

# national level files
# greenspace_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
osm_data_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
gtfs_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip"

In [2]:
# import

# origins (IE output areas, OAs)
oas_centroids = gpd.read_file(oas_centroids_file,
                              layer="OA_centroids_TyneWear")
oas_centroids['id'] = oas_centroids['OA11CD'] # Origin dataset must contain an 'id' column for r5py
oas_centroids.head()

# destination data
# green space sites' entrances
gs_entrances = gpd.read_file(greenspace_file,
                        layer = "access_points")

gs_entrances.head() # Destination dataset already contains an 'id' column
# WPZ centroids
wpz_centroids = gpd.read_file(workingplacezones_centroids_file,
                              layer = "WPZ_centroids_tynewear")
wpz_centroids.head()
wpz_centroids['id'] = wpz_centroids['wz11cd'] # Destination dataset must contain an 'id' column for r5py

gs_sites = gpd.read_file(greenspace_file,
                         layer = "sites")

# network data
# uploaded in the sequent operation

# opportunities / land use data
jobs_per_wpz_df = pd.read_csv(jobs_file) # working place zones, population (as a proxy for n of jobs)
# note: opportunities column is called "pop"

In [None]:
gs_entrances.explore()

### CRS conversion

In [3]:
# Converting the original files' crs to GWS84, which is compatible with GTFS and OSM data
oas_centroids_wgs84 = oas_centroids.to_crs("epsg:4326")
gs_entrances = gs_entrances.to_crs("epsg:4326")
# gs_sites = gs_sites.to_crs("epsg:4326") # let's leave the layer in epsg:27700, as we need the prj for calculating the areas
wpz_centroids = wpz_centroids.to_crs("epsg:4326")

### Origins and destinations

In [None]:
oas_centroids.head()

In [None]:
wpz_centroids.head()

In [None]:
gs_entrances.head()

In [4]:

# origins:
#   OAs
# destinations:
#   gs: entrances + OAs centroids
#   jobs: wpz centroids + OAs centroids
# total destination: OAs centroids + wpz centroids + gs entrances

origins = oas_centroids_wgs84

# destinations common fields: 'id', 'geometry'
# simply concatenate the dataframes...
# need to keep the info on greenspace site's name to link with the entrances later on

destinations = pd.concat([oas_centroids_wgs84[['id', 'geometry']],
                          wpz_centroids[['id', 'geometry']],
                          gs_entrances[['id', 'geometry', 'refToGreenspaceSite']]]
                         ).reset_index(drop = True)




### Opportunities

In [None]:
# jobs: n of employees per WPZ
# greenspace: area of site


# add column with opportunity ... one for all?

## 1. Travel time matrix computation

### Generate the transport network

Compute the network starting from OSM and GTFS data

In [5]:
# load in transport network
transport_network = TransportNetwork(
    osm_data_file,
    [
        gtfs_data_file
    ]
)

### Create an empty matrix that contains all origins and destinations to be used later on

This table will be filled up once we calculate the ttm

In [None]:
# # for testing purposes:
k = 1000
# selecting first n rows of dataframe for origins and destinations
# origins = oas_centroids.loc[:k, :]
# destinations = wpz_centroids.loc[:n, :]
# selecting random rows, so to make sure we have both wpz AND gs_entrances in the selection of destinations
origins = origins.sample(n=k)
destinations = destinations.sample(n=k)

In [6]:
# generate dataframe with all from_id and all to_id pairs
# (empty for now, to be filled up later on)
prod = product(origins['id'].unique(),
               destinations['id'].unique())
empty_ttm = pd.DataFrame(prod)
empty_ttm.columns = ['from_id', 'to_id']
empty_ttm.head()

Unnamed: 0,from_id,to_id
0,E00041377,E33000251
1,E00041377,E33000799
2,E00041377,E33000257
3,E00041377,E33000079
4,E00041377,E33000174


### Travel time matrix

The following piece of code is split in 2:
- first part is definition of variables that will be inputted as parameters in the ttm computation
- second part is the loop to generate ttm for several transport modes

In [None]:
# defining variables
date_time = '2023,01,19,9,30' # CHOOSE BEST DATE/TIME
# max_time = dt.timedelta(seconds=900) # SET TO 15 MIN
walking_speed = 4.8
cycling_speed = 16
# dataframe to match legmode and transitmode objects (to be inputted in the ttm computer):
modes_lut = pd.DataFrame([
                          ['transit', TransitMode.TRANSIT, LegMode.WALK],
                          ['car', '', LegMode.CAR],
                          ['bicycle', '', LegMode.BICYCLE],
                          ['walk','', LegMode.WALK],
                         ],
                         columns = ('Mode', 'Transit_mode', 'Leg_mode'))

# function to generate custom list of transit+transport mode for the parameter transport_modes in TravelTimeMatrixComputer
def list_making(s,z):
    return [s] + [z]

ttm_complete = empty_ttm

# loop to compute a ttm for all the modes and generate one single ttm table in output
for row in modes_lut.itertuples():
    start_time = dt.datetime.now()
    mode = row.Mode
    transit_mode = row.Transit_mode
    leg_mode = row.Leg_mode
    transport_mode = list_making(transit_mode,leg_mode) # creating list of objects for transport_modes parameter

    print('The current mode is:', mode, ', transit is:', transit_mode, ', transport var is:', transport_mode)
    ttm_computer = TravelTimeMatrixComputer(
        transport_network,
        origins = origins,
        destinations = destinations,
        departure = dt.datetime.strptime(date_time, '%Y,%m,%d,%H,%M'),
        # max_time = max_time,
        speed_walking = walking_speed,
        speed_cycling = cycling_speed,
        transport_modes = transport_mode
    )

    ttm = ttm_computer.compute_travel_times()
    ttm = ttm.rename(columns = {'travel_time':f'time_{mode}'}) # renaming 'travel_time' column (automatically generated) to 'time_{mode of transport}'
    ttm.isna().sum() # checking for empty values, to see if the ttm actually calculated something
    #  merging the empty table generated before (with all possible origins and destinations) with the ttm, per each mode adding a travel time column
    ttm_complete = ttm_complete.merge(ttm,
                    how ='outer',
                    left_on = ['from_id','to_id'],
                    right_on = ['from_id','to_id'])
    
    print('finished calculating ttm for mode', mode)
    end_time = datetime.now()
    print('Duration for', mode, ': {}'.format(end_time - start_time))

## 2. Accessibility calculation

Using [jamaps/tracc](https://github.com/jamaps/tracc) package

## Accessibility to jobs

In [None]:
ttm_jobs = ttm_complete.copy(deep=True) # saving a copy of the matrix (the following operations will add columns to it, but we want to keep the original one also)

# generate tracc cost object
ttm_jobs_tracc = tracc.costs(ttm_jobs)

modes_list = ['transit',
              'car',
              'bicycle',
              'walk']

# empty dataframe to be filled up in the next for loop
acc_pot_jobs = origins[['id']]

for m in modes_list:
    # generate variable names to be used in the tracc function below
    cost_name = 'time_' + m
    travel_costs_ids = ["from_id","to_id"]
    supplyID = "wpz11cd"
    impedence_param = 15 # value for impedence function, to be changed as needed
    impedence_param_string = str(impedence_param)
    cost_output = 'cum_' + impedence_param_string + '_' + m
    acc_column_name = 'pot_cum_acc_' + impedence_param_string + '_' + m
    opportunity = "pop"
# Computing impedance function based on a 15 minute travel time threshold.
    ttm_jobs_tracc.impedence_calc(
        cost_column = cost_name,
        impedence_func = "cumulative",
        impedence_func_params = impedence_param, # to calculate n of jobs in n min threshold
        output_col_name = cost_output,
        prune_output = False
    )

# Setting up the accessibility object. This includes joining the destination data to the travel time data
    acc_jobs= tracc.accessibility(
            travelcosts_df = ttm_jobs_tracc.data,
            supply_df = jobs_per_wpz_df,
            travelcosts_ids = travel_costs_ids,
            supply_ids = supplyID
        )
    acc_jobs.data.head()

# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
    # acc_pot_jobs = acc_jobs.potential(
    #         opportunity = "pop",
    #         impedence = cost_output,
    #         output_col_name= "pot_acc_" + cost_output
    #         )
    # the above function generate overwrite the column at every loop
    # so we reproduce the same function (from tracc documentation) per each mode:
    acc_jobs.data[acc_column_name] = acc_jobs.data[opportunity] * acc_jobs.data[cost_output]
    group_sum_bymode_acc = acc_jobs.data.groupby(acc_jobs.data[travel_costs_ids[0]])[[acc_column_name]].sum()
    acc_pot_jobs = acc_pot_jobs.merge(group_sum_bymode_acc,
                    how ='outer',
                    left_on = 'id',
                    right_on = 'from_id')

In [None]:
acc_jobs.data.head()

In [None]:
acc_pot_jobs.head()

In [None]:
# saving output to external file

## Accessibility to greenspace

In [None]:
# edit greenspace layers
# change the 'id' column name, as it's the same in both layers and generates issues later on
gs_entrances.columns # ['id', 'accessType', 'refToGreenspaceSite', 'geometry']
gs_entrances.rename(columns={'id':'id_entrance'},
                    inplace=True)
gs_sites.columns # ['id', 'function', 'geometry']
gs_sites.rename(columns={'id':'id_site'},
                inplace=True)

# calculates sites' area:
gs_sites['area_m2'] = gs_sites['geometry'].area


In [None]:
gs_entrances.head()
gs_sites.head()
gs_sites.explore(column='area_m2',
                       cmap="plasma",
                       scheme='NaturalBreaks',
                       k=10)

In [None]:
gs_entrances.head()

In [None]:
gs_sites.head()

In [None]:
# associate park area to entrances
gs_entrances_with_parkarea = pd.merge(gs_entrances[['id_entrance','refToGreenspaceSite']],
                                gs_sites[['id_site', 'function','area_m2']],
                                left_on='refToGreenspaceSite',
                                right_on='id_site',
                                how='right'
                                )


In [None]:
gs_entrances_with_parkarea.head()

In [None]:
ttm_complete.head()

In [None]:
ttm_greenspace = ttm_complete.copy() # saving a copy of the matrix (the following operations will add columns to it, but we want to keep the original one also)


ttm_gs_with_area =  pd.merge(ttm_greenspace,
                            gs_entrances_with_parkarea[['id_entrance','refToGreenspaceSite', 'area_m2']],
                            left_on='to_id',
                            right_on='id_entrance',
                            how='left'
                            )
# generate tracc cost object
ttm_gs_tracc = tracc.costs(ttm_gs_with_area)

modes_list = ['transit',
              'car',
              'bicycle',
              'walk']

# empty dataframes to be filled up in the next for loop
acc_pot_gs = origins[['id']]
gs_acc = []

for m in modes_list:
    # generate variable names to be used in the tracc function below
    cost_name = 'time_' + m
    travel_costs_ids = ["from_id","to_id"]
    impedence_param = 15 # value for impedence function, to be changed as needed
    impedence_param_string = str(impedence_param)
    # name of the column
    cost_output = 'cum_' + impedence_param_string + '_' + m # naming depends on impedence function threshold
    area_column_name = 'area_' + impedence_param_string + '_' + m
    acc_column_name = 'pot_cum_acc_' + impedence_param_string + '_' + m # naming depends on impedence function threshold
    opportunity = "pop"
# Computing impedence function based on a 15 minute travel time threshold.
    ttm_gs_tracc.impedence_calc(
        cost_column = cost_name,
        impedence_func = "cumulative",
        impedence_func_params = impedence_param, # to calculate opportunities in X min threshold
        output_col_name = cost_output,
        prune_output = False
    )
    ttm_gs_df = ttm_gs_tracc.data
    print(ttm_gs_df.columns)
# Setting up the accessibility object. This includes joining the destination data to the travel time data
# this needed to be done differently for greenspace, as opportunity is sites's area cumulative sum
# A. Filtering only rows with time travel within the threshold
    print("cost output is", cost_output)
    print('area column name is', area_column_name)
    # tracc_15min = ttm_gs_tracc.data[ttm_gs_tracc.data.loc[:,cost_output]==1] # this doesn't work because of the different lenghts of the columns generated per mode
    ttm_gs_tracc.data[area_column_name] =  ttm_gs_tracc.data['area_m2'] * ttm_gs_tracc.data[cost_output]
    ttm_gs_df = ttm_gs_tracc.data

# B. Filter entrances (only one per park)
    oneaccess_perpark = ttm_gs_df.sort_values(cost_name).drop_duplicates(["from_id", "refToGreenspaceSite"])
    oneaccess_perpark.head()
# C. Assign metric as sum[parks' area]
    # generate df with one row per OA centroid ('from_id') and sum of sites' areas - per each mode
    gs_metric_per_mode = oneaccess_perpark.groupby(['from_id'])[area_column_name].sum() #.reset_index()
    gs_acc.append(gs_metric_per_mode)
gs_acc = pd.concat(gs_acc,
                   axis=1)


Plotting results

In [None]:
oas_boundaries = gpd.read_file(oas_file,
                               layer="OA_TyneWear")
oas_boundaries_wgs84 = oas_boundaries.to_crs("epsg:4326")

In [None]:
oas_boundaries_metric = oas_boundaries_wgs84.merge(gs_acc,
                                                     left_on = 'geo_code',
                                                     right_on = "from_id",
                                                     how = "right")

In [None]:
oas_boundaries_metric.explore(column='area_15_transit',
                              cmap="plasma",
                              scheme='NaturalBreaks',
                              k=10)

In [None]:
oas_boundaries_metric.explore(column='area_15_car',
                              cmap="plasma",
                              scheme='NaturalBreaks',
                              k=10)