This script originates from `accessb_r5py.ipynb` and is a first try to generate a methodology to build accessibility indicators in a comprehensive way.

When the methodology is robust, this script will probably be split in two, one for green spaces and one for jobs accessibility.

1. Variables definition

In [17]:
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
import matplotlib.pyplot as plt
sys.argv.append(["--max-memory", "8G"])


data_folder = "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"


# regional level files: (require previous editing)
# spatial:
oas_centroids_file = f"{data_folder}/processed/OA_centroids_TyneWear.gpkg" # used for population origin
oas_file = f"{data_folder}/processed/authorities/OA_TyneWear.gpkg" # needed for visualisation purposes
lsoas_centroids_file = f"{data_folder}/processed/authorities/tynewear_lsoas_centroids_wgs84.shp" # used for jobs location (destination)
region_lads_file = f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries
wpz_centroids_file = f"{data_folder}/processed/authorities/WPZ_centroids_tynewear.gpkg"
# non-spatial
# jobs_file = f"{data_folder}/processed/accessibility/bres_jobs_tynewear.csv" # n of jobs per LSOA
jobs_file = f"{data_folder}/processed/accessibility/wpz_tynewear_occupation_edited.csv"

# national level files
# gs_accesspoints_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
osm_data_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
gtfs_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip"

2. data import

In [31]:
# reading in the origin/destination points geospatial data (just for tynewear region)

# origins (IE output areas, OAs)
origin_centroids = gpd.read_file(oas_centroids_file,
                                 layer="OA_centroids_TyneWear")
origin_centroids['id'] = origin_centroids['OA11CD'] # Origin dataset must contain an 'id' column
origin_centroids.head()
# origin_centroids.explore()

#  destinations (IE: WPZ for jobs location, and Greenspace access points for greenspace)
wpz_centroids = gpd.read_file(wpz_centroids_file,
                              layer = "WPZ_centroids_tynewear")
wpz_centroids.head()
wpz_centroids['id'] = wpz_centroids['wz11cd'] # Destination dataset must contain an 'id' column

# accesspoints = gpd.read_file(gs_accesspoints_file,
#                         layer = "AccessPoint")
# accesspoints.head()


# reading in land use data:
jobs_per_wpz = pd.read_csv(jobs_file) # working place zones, population (as a proxy for n of jobs)
jobs_per_wpz.head()

# for mapping:
oas_boundaries = gpd.read_file(oas_file,
                               layer="OA_TyneWear")


Unnamed: 0,OBJECTID,OA11CD,GlobalID,geometry,id
0,126926,E00041377,c03c9813-26f3-41f9-85e5-d4cdf3742ca0,POINT (425583.000 562952.000),E00041377
1,126927,E00041435,16e6607e-0b59-4f6f-8ec6-06a7396a70a5,POINT (427216.699 555732.531),E00041435
2,126928,E00041745,4b5fa995-b251-4ee7-9a97-aef0a2598fe3,POINT (427897.004 559557.605),E00041745
3,126929,E00041432,6e660884-3917-4e46-a693-bad0821318cb,POINT (427856.367 555759.595),E00041432
4,126930,E00041742,0bfb7f06-a910-4fa2-8db1-e79d319ba232,POINT (427932.556 559770.754),E00041742


In [36]:
# reprojecting layers that are in wgs84 crs to bng (need it to have the network build work)


origin_centroids.crs # epsg:27700
wpz_centroids.crs # epsg: 27700
oas_boundaries.crs # epsg:27700
origin_centroids_wgs84 = origin_centroids.to_crs("epsg:4326")
wpz_centroids_wgs84 = wpz_centroids.to_crs("epsg:4326")
oas_boundaries_wgs84 = oas_boundaries.to_crs("epsg:4326")


In [37]:
# merging geospatial data (admin boundaries/centroids) with landuse data (jobs and greenspaces points, could be population etc)

landuse_gdf = pd.merge(wpz_centroids_wgs84, # gdf file
                       jobs_per_wpz, # info we want to add
                       how = "inner", # this allows to pick (from the nationals centroids table) only LSOAs that belong to the region (jobs file)
                       left_on = "wz11cd",
                       right_on = "wpz11cd")
landuse_gdf.head()


# landuse_gdf.jobs = landuse_gdf.jobs.fillna(0).astype(np.int64) # removing NaN and changing type to Int
# # hoping that this makes "explore" work below. NOTE: deal with this issue later on in a better way (IE clean data before importing them in python)
# landuse_gdf.dtypes

Unnamed: 0,OBJECTID,wz11cd,GlobalID,geometry,id,wpz11cd,pop
0,2,E33000251,{AF2BD35C-B624-4E2D-9C78-F26DF4FCABCE},POINT (-1.41992 54.91839),E33000251,E33000251,656
1,3,E33000799,{8CB93749-3349-462C-93C7-B6E321CC765C},POINT (-1.61606 54.97382),E33000799,E33000799,1118
2,4,E33000257,{03204BF6-50A6-4AD1-855F-C7BBE6D8137B},POINT (-1.53272 54.90010),E33000257,E33000257,2842
3,5,E33000079,{53333BDF-9792-4370-94AB-BE7853FA2ACA},POINT (-1.62268 55.01104),E33000079,E33000079,214
4,8,E33000174,{35114C58-FAA7-4E83-9724-ACED166052D5},POINT (-1.50942 55.02269),E33000174,E33000174,869


## Load transport network

>  To import the street and public transport networks, instantiate an `r5py.TransportNetwork` with the file paths to the OSM extract and the GTFS files:

In [28]:
transport_network = TransportNetwork(
    osm_data_file,
    [
        gtfs_data_file
    ]
)

## Compute travel matrix
1. generate time travel matrix from OAs to all LSOas centroids by transit

In [41]:
ttm_jobs_transit_OAtoWZ = TravelTimeMatrixComputer(
    transport_network,
    origins=origin_centroids_wgs84,
    destinations=wpz_centroids_wgs84,
    departure=datetime.datetime(2023,1,19,8,30),
    transport_modes=[TransitMode.TRANSIT,
                     LegMode.WALK]
)
ttm_jobs_transit_OAtoWZ = ttm_jobs_transit_OAtoWZ.compute_travel_times()
ttm_jobs_transit_OAtoWZ.head()

Unnamed: 0,from_id,to_id,travel_time
0,E00041377,E33000251,78.0
1,E00041377,E33000799,18.0
2,E00041377,E33000257,43.0
3,E00041377,E33000079,33.0
4,E00041377,E33000174,55.0


In [42]:
ttm_jobs_transit_OAtoWZ.to_csv("/Users/azanchetta/OneDrive - The Alan Turing Institute/Research/projects/LandUseDemonstrator/output/ttm_transit_oaTOwpz_jobs.csv")

In [43]:
df_tracc = tracc.costs(ttm_jobs_transit)
df_tracc.data.head()

Unnamed: 0,from_id,to_id,travel_time
0,E00041377,E33000251,78.0
1,E00041377,E33000799,18.0
2,E00041377,E33000257,43.0
3,E00041377,E33000079,33.0
4,E00041377,E33000174,55.0


In [44]:
median_times = ttm_jobs_transit.groupby("from_id")["travel_time"].median()
median_times

from_id
E00041363    52.0
E00041364    53.0
E00041366    50.0
E00041367    52.0
E00041368    54.0
             ... 
E00175601    61.0
E00175602    72.0
E00175603    64.0
E00175604    72.0
E00175605    49.0
Name: travel_time, Length: 3795, dtype: float64

In [45]:
max_time = ttm_jobs_transit.groupby("from_id")["travel_time"].max()
max_time.max()

122.0

In [46]:
# Computing impedance function based on a 45 minute travel time threshold.
df_tracc.impedence_calc(
    cost_column = "travel_time",
    impedence_func = "cumulative",
    impedence_func_params = 15, # to calculate n of jobs in 15 min
    output_col_name = "cum_15",
    prune_output = False
)
df_tracc.data.head()

Unnamed: 0,from_id,to_id,travel_time,cum_15
0,E00041377,E33000251,78.0,0
1,E00041377,E33000799,18.0,0
2,E00041377,E33000257,43.0,0
3,E00041377,E33000079,33.0,0
4,E00041377,E33000174,55.0,0


In [48]:
# Setting up the accessibility object. This includes joining the destination data to the travel time data
acc_transit = tracc.accessibility(
        travelcosts_df = df_tracc.data,
        supply_df = jobs_per_wpz,
        travelcosts_ids = ["from_id","to_id"],
        supply_ids = "wpz11cd"
    )
acc_transit.data.head()

Unnamed: 0,from_id,to_id,travel_time,cum_15,wpz11cd,pop
0,E00041377,E33000251,78.0,0,E33000251,656
1,E00041377,E33000799,18.0,0,E33000799,1118
2,E00041377,E33000257,43.0,0,E33000257,2842
3,E00041377,E33000079,33.0,0,E33000079,214
4,E00041377,E33000174,55.0,0,E33000174,869


In [50]:
# Measuring potential accessibility to jobs, using a 45 minute cumulative impedance function
dfa_transit = acc_transit.potential(
        opportunity = "pop",
        impedence = "cum_15"
        )
dfa_transit.head()

Unnamed: 0,from_id,A_pop_cum_15
0,E00041363,4318
1,E00041364,7604
2,E00041366,845
3,E00041367,3413
4,E00041368,1728


In [51]:
oas_boundaries.head()

Unnamed: 0,geo_code,geometry
0,E00042786,"POLYGON ((428997.799 566018.331, 428998.491 56..."
1,E00042707,"POLYGON ((424221.655 568003.052, 424221.754 56..."
2,E00042703,"POLYGON ((419858.836 565454.433, 419858.374 56..."
3,E00042782,"POLYGON ((428932.199 566299.133, 428933.629 56..."
4,E00042789,"POLYGON ((428853.730 565689.295, 428860.602 56..."


In [None]:
# oas_boundaries_wgs84.explore()

In [52]:
codes_from_boundaries = oas_boundaries['geo_code'].tolist()  # 3794
codes_from_boundaries
codes_from_centroids = origin_centroids['OA11CD'].tolist()  # 3795
list(set(codes_from_centroids).difference(codes_from_boundaries))

[]

In [54]:
# plotting results
oas_boundaries_accjobs = oas_boundaries_wgs84.merge(dfa_transit,
                                                     left_on = 'geo_code',
                                                     right_on = "from_id",
                                                     how = "right")
oas_boundaries_accjobs.head()
oas_boundaries_accjobs.A_pop_cum_15.isnull().sum().sum() # (checking number of na in column) -> zero


0

In [55]:
oas_boundaries_accjobs.explore(column = "A_pop_cum_15",
                               cmap="plasma",
                               scheme='NaturalBreaks',
                               k=8
                               )

KeyError: 'A_jobs_cum_15'

In [113]:
oas_boundaries_accjobs.head()

Unnamed: 0,geo_code,geometry,from_id,A_jobs_cum_15
0,E00041363,"POLYGON ((-1.59565 54.95480, -1.59557 54.95463...",E00041363,1065
1,E00041364,"POLYGON ((-1.60256 54.95461, -1.60256 54.95461...",E00041364,1425
2,E00041366,"POLYGON ((-1.58667 54.95529, -1.58667 54.95528...",E00041366,900
3,E00041367,"POLYGON ((-1.59069 54.95542, -1.59069 54.95542...",E00041367,1115
4,E00041368,"POLYGON ((-1.58402 54.95282, -1.58382 54.95290...",E00041368,290


In [114]:
oas_boundaries_accjobs.describe()

Unnamed: 0,A_jobs_cum_15
count,3795.0
mean,2430.827404
std,4783.684558
min,0.0
25%,387.5
50%,975.0
75%,2350.0
max,66500.0


In [115]:
dfa_transit.to_csv(f"{data_folder}/processed/accessibility/results/acc_jobs15_OAs_tynewear.csv")