This script originates from `accessb_r5py.ipynb` and is a first try to generate a methodology to build accessibility indicators in a comprehensive way.

When the methodology is robust, this script will probably be split in two, one for green spaces and one for jobs accessibility.

1. Variables definition

In [20]:
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
import matplotlib.pyplot as plt
sys.argv.append(["--max-memory", "8G"])


data_folder = "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"


# regional level files: (require previous editing)
oas_centroids_file = f"{data_folder}/processed/authorities/OA_centroids_TyneWear.gpkg" # used for population origin
lsoas_centroids_file = f"{data_folder}/processed/authorities/tynewear_lsoas_centroids_wgs84.shp" # used for jobs location (destination)
region_lads_file = f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries
# non-spatial
jobs_file = f"{data_folder}/processed/accessibility/bres_jobs_tynewear.csv" # n of jobs per LSOA

# national level files
gs_accesspoints_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
osm_data_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
gtfs_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip"

2. data import

In [37]:
# reading in the origin/destination points geospatial data (just for tynewear region)

# origins (IE output areas, OAs)
origin_centroids = gpd.read_file(oas_centroids_file,
                                 layer="OA_centroids_TyneWear")
origin_centroids['id'] = origin_centroids['OA11CD'] # Origin dataset must contain an 'id' column
origin_centroids.head()
# origin_centroids.explore()

#  destinations (IE: LSOAs for jobs location, and Greenspace access points for greenspace)
lsoas_centroids = gpd.read_file(lsoas_centroids_file)
lsoas_centroids.head()
lsoas_centroids['id'] = lsoas_centroids['LSOA21CD'] # Destination dataset must contain an 'id' column

accesspoints = gpd.read_file(gs_accesspoints_file,
                        layer = "AccessPoint")
accesspoints.head()


# reading in land use data:
jobs_per_lsoa = pd.read_csv(jobs_file) # n of jobs per lsoa from Nomis Bres
jobs_per_lsoa.head()



Unnamed: 0,LSOA21CD,geometry,id
0,E01008162,POINT (-1.58805 54.95842),E01008162
1,E01008588,POINT (-1.43999 55.04056),E01008588
2,E01008510,POINT (-1.49579 54.99911),E01008510
3,E01008707,POINT (-1.40202 54.90328),E01008707
4,E01008184,POINT (-1.75247 54.91805),E01008184


In [40]:
# reprojecting layers that are in wgs84 crs to bng (need it to have the network build work)


origin_centroids.crs # epsg:27700
lsoas_centroids.crs # epsg: 4326
origin_centroids_wgs84 = origin_centroids.to_crs("epsg:4326")


In [33]:
# merging geospatial data (admin boundaries/centroids) with landuse data (jobs and greenspaces points, could be population etc)

landuse_gdf = pd.merge(lsoas_centroids_wgs84, # gdf file
                       jobs_per_lsoa, # info we want to add
                       how = "inner", # this allows to pick (from the nationals centroids table) only LSOAs that belong to the region (jobs file)
                       left_on = "LSOA21CD",
                       right_on = "LSOA11CD")
landuse_gdf.head()


landuse_gdf.jobs = landuse_gdf.jobs.fillna(0).astype(np.int64) # removing NaN and changing type to Int
# hoping that this makes "explore" work below. NOTE: deal with this issue later on in a better way (IE clean data before importing them in python)
landuse_gdf.dtypes

LSOA21CD      object
geometry    geometry
id            object
LSOA11CD      object
jobs           int64
dtype: object

## Load transport network

>  To import the street and public transport networks, instantiate an `r5py.TransportNetwork` with the file paths to the OSM extract and the GTFS files:

In [34]:
transport_network = TransportNetwork(
    osm_data_file,
    [
        gtfs_data_file
    ]
)

## Compute travel matrix
1. generate time travel matrix from OAs to all LSOas centroids by transit

In [42]:
ttm_jobs_transit = TravelTimeMatrixComputer(
    transport_network,
    origins=origin_centroids_wgs84,
    destinations=lsoas_centroids,
    departure=datetime.datetime(2023,1,19,8,30),
    transport_modes=[TransitMode.TRANSIT,
                     LegMode.WALK]
)
ttm_jobs_transit = ttm_jobs_transit.compute_travel_times()
ttm_jobs_transit.head()

Unnamed: 0,from_id,to_id,travel_time
0,E00041377,E01008162,14.0
1,E00041377,E01008588,50.0
2,E00041377,E01008510,46.0
3,E00041377,E01008707,51.0
4,E00041377,E01008184,57.0


In [46]:
ttm_jobs_transit.to_csv("/Users/azanchetta/OneDrive - The Alan Turing Institute/Research/projects/LandUseDemonstrator/output/ttm_transit_oaTOlsoa_jobs.csv")

In [62]:
df_tracc = tracc.costs(ttm_jobs_transit)
df_tracc.data.head()

Unnamed: 0,from_id,to_id,travel_time,fCij_c45,jobs_cum_15
0,E00041377,E01008162,14.0,1,1
1,E00041377,E01008588,50.0,0,0
2,E00041377,E01008510,46.0,0,0
3,E00041377,E01008707,51.0,0,0
4,E00041377,E01008184,57.0,0,0


In [63]:
median_times = ttm_jobs_transit.groupby("from_id")["travel_time"].median()
median_times

from_id
E00041363    57.0
E00041364    58.0
E00041366    54.0
E00041367    56.0
E00041368    59.0
             ... 
E00175601    66.0
E00175602    75.0
E00175603    67.0
E00175604    64.0
E00175605    53.0
Name: travel_time, Length: 3795, dtype: float64

In [52]:
max_time = ttm_jobs_transit.groupby("from_id")["travel_time"].max()
max_time.max()

119.0

In [64]:
# Computing impedance function based on a 45 minute travel time threshold.
df_tracc.impedence_calc(
    cost_column = "travel_time",
    impedence_func = "cumulative",
    impedence_func_params = 15, # to calculate n of jobs in 15 min
    output_col_name = "jobs_cum_15",
    prune_output = False
)
df_tracc.data.head()

Unnamed: 0,from_id,to_id,travel_time,fCij_c45,jobs_cum_15
0,E00041377,E01008162,14.0,1,1
1,E00041377,E01008588,50.0,0,0
2,E00041377,E01008510,46.0,0,0
3,E00041377,E01008707,51.0,0,0
4,E00041377,E01008184,57.0,0,0


In [65]:
# Setting up the accessibility object. This includes joining the destination data to the travel time data
acc_transit = tracc.accessibility(
        travelcosts_df = df_tracc.data,
        supply_df = jobs_per_lsoa,
        travelcosts_ids = ["from_id","to_id"],
        supply_ids = "LSOA11CD"
    )
acc_transit.data.head()

Unnamed: 0,from_id,to_id,travel_time,fCij_c45,jobs_cum_15,LSOA11CD,jobs
0,E00041377,E01008162,14.0,1,1,E01008162,125
1,E00041377,E01008588,50.0,0,0,E01008588,450
2,E00041377,E01008510,46.0,0,0,E01008510,175
3,E00041377,E01008707,51.0,0,0,E01008707,100
4,E00041377,E01008184,57.0,0,0,E01008184,300


In [67]:
# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
dfa_transit = acc_transit.potential(
        opportunity = "jobs",
        impedence = "jobs_cum_15"
        )
dfa_transit.head()

Unnamed: 0,from_id,A_jobs_jobs_cum_15
0,E00041363,1065
1,E00041364,1425
2,E00041366,900
3,E00041367,1115
4,E00041368,290
