This script originates from `accessb_r5py.ipynb` and is a first try to generate a methodology to build accessibility indicators in a comprehensive way.

When the methodology is robust, this script will probably be split in two, one for green spaces and one for jobs accessibility.

1. Variables definition

In [1]:
import sys
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
import tracc
from r5py import TransportNetwork, TravelTimeMatrixComputer, TransitMode, LegMode
from datetime import timedelta
import matplotlib.pyplot as plt
sys.argv.append(["--max-memory", "8G"])


data_folder = "/Users/azanchetta/OneDrive - The Alan Turing Institute/demoland_data"


# regional level files: (require previous editing)
# spatial:
oas_centroids_file = f"{data_folder}/processed/OA_centroids_TyneWear.gpkg" # used for population origin
oas_file = f"{data_folder}/processed/authorities/OA_TyneWear.gpkg" # needed for visualisation purposes
# lsoas_centroids_file = f"{data_folder}/processed/authorities/tynewear_lsoas_centroids_wgs84.shp" # used for jobs location (destination)
region_lads_file = f"{data_folder}/processed/authorities/LADs_tynewear.shp" # needed in order to filter greenspace data within the regional boundaries
# non-spatial:


# national level files
greenspace_file = f"{data_folder}/raw/accessibility/OS Open Greenspace (GPKG) GB/data/opgrsp_gb.gpkg"
osm_data_file = f"{data_folder}/raw/accessibility/tyne-and-wear-latest.osm.pbf"
gtfs_data_file = f"{data_folder}/raw/accessibility/itm_north_east_gtfs.zip"

2. data import

In [2]:
# reading in the origin/destination points geospatial data (just for tynewear region)

# origins (IE output areas, OAs)
origin_centroids = gpd.read_file(oas_centroids_file,
                                 layer="OA_centroids_TyneWear")
origin_centroids['id'] = origin_centroids['OA11CD'] # Origin dataset must contain an 'id' column
origin_centroids.head()
# origin_centroids.explore()

#  destinations (IE: WPZ for jobs location, and Greenspace access points for greenspace)
# wpz_centroids = gpd.read_file(wpz_centroids_file,
#                               layer = "WPZ_centroids_tynewear")
# wpz_centroids.head()
# wpz_centroids['id'] = wpz_centroids['wz11cd'] # Destination dataset must contain an 'id' column

accesspoints = gpd.read_file(greenspace_file,
                        layer = "AccessPoint")
accesspoints.head()

# for mapping:
oas_boundaries = gpd.read_file(oas_file,
                               layer="OA_TyneWear")
region_lads = gpd.read_file(region_lads_file)
region_lads.head()

Unnamed: 0,OBJECTID,LAD20CD,LAD20NM,LAD20NMW,BNG_E,BNG_N,LONG,LAT,Shape__Are,Shape__Len,label,geometry
0,265,E08000021,Newcastle upon Tyne,,422287,569662,-1.65297,55.02101,113461900.0,65202.925674,Newcastle upon Tyne\nE08000021,"POLYGON ((422592.399 576160.095, 422618.297 57..."
1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022,"MULTIPOLYGON (((435203.599 575441.701, 435209...."
2,267,E08000023,South Tyneside,,435514,564057,-1.44679,54.96988,64428420.0,51370.230506,South Tyneside\nE08000023,"POLYGON ((438030.200 568413.300, 438021.350 56..."
3,268,E08000024,Sunderland,,436470,551524,-1.43344,54.85719,137441200.0,99737.411804,Sunderland\nE08000024,"MULTIPOLYGON (((441259.800 557854.000, 441252...."
4,281,E08000037,Gateshead,,420168,559658,-1.6868,54.9312,142369100.0,90476.826397,Gateshead\nE08000037,"POLYGON ((415042.801 565083.296, 415104.202 56..."


In [3]:
# selecting green spaces point access within the LADs boundaries
region_accesspoints = gpd.sjoin(accesspoints,
                                region_lads,
                                op = 'within'
                                )
region_accesspoints.head()

  if await self.run_code(code, result, async_=asy):


Unnamed: 0,id,accessType,refToGreenspaceSite,geometry,index_right,OBJECTID,LAD20CD,LAD20NM,LAD20NMW,BNG_E,BNG_N,LONG,LAT,Shape__Are,Shape__Len,label
37465,idD93E3AB6-BDCE-483D-B3CF-4242FA90A0B7,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (428393.150 571055.200),1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022
37466,id951F323D-8E88-4A5B-B9A4-37E0D69DD870,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (428104.740 571065.000),1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022
37467,id0E14522B-427F-47C1-B043-BC3847ABE673,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (428096.340 571073.320),1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022
37468,id548D0EAC-E6BE-4DFA-B90C-DB631A75309B,Pedestrian,idE56DE841-2BC6-13A9-E053-AAEFA00A0D0E,POINT (428234.820 571077.590),1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022
37469,id0FECA8F4-6053-4147-A11D-62B01EC6C135,Pedestrian,idE56DE6C0-48DC-13A9-E053-AAEFA00A0D0E,POINT (428229.590 571078.720),1,266,E08000022,North Tyneside,,431471,570602,-1.50923,55.02896,82313730.0,65337.781081,North Tyneside\nE08000022


In [None]:
# region_accesspoints.explore()

In [4]:
# reprojecting layers that are in wgs84 crs to bng (need it to have the network build work, IE they need to have same CRS of osm and gtfs data)


origin_centroids.crs # epsg:27700
accesspoints.crs # epsg: 27700
oas_boundaries.crs # epsg:27700
origin_centroids_wgs84 = origin_centroids.to_crs("epsg:4326")
accesspoints_wgs84 = region_accesspoints.to_crs("epsg:4326")
oas_boundaries_wgs84 = oas_boundaries.to_crs("epsg:4326")


In [None]:
# # merging geospatial data (admin boundaries/centroids) with land use/opportunities data (jobs and greenspaces points, could be population etc)

# landuse_gdf = pd.merge(wpz_centroids_wgs84, # gdf file
#                        jobs_per_wpz, # info we want to add
#                        how = "inner", # this allows to pick (from the nationals centroids table) only LSOAs that belong to the region (jobs file)
#                        left_on = "wz11cd",
#                        right_on = "wpz11cd")
# landuse_gdf.head()


# landuse_gdf.jobs = landuse_gdf.jobs.fillna(0).astype(np.int64) # removing NaN and changing type to Int
# # hoping that this makes "explore" work below. NOTE: deal with this issue later on in a better way (IE clean data before importing them in python)
# landuse_gdf.dtypes

## Load transport network

>  To import the street and public transport networks, instantiate an `r5py.TransportNetwork` with the file paths to the OSM extract and the GTFS files:

In [None]:
transport_network = TransportNetwork(
    osm_data_file,
    [
        gtfs_data_file
    ]
)

## Compute travel matrix
Generate time travel matrix from OAs to all access points to greenspace, by walking.
Setting max time to 15min (though it arrives to calculate up to 15min)

In [None]:
ttm_walking_OAtoGS = TravelTimeMatrixComputer(
    transport_network,
    origins=origin_centroids_wgs84,
    destinations=accesspoints_wgs84,
    max_time=dt.timedelta(seconds=900), # restricting travel to 15min
    speed_walking=4.8,
    transport_modes=[LegMode.WALK]
)
ttm_walking_OAtoGS = ttm_walking_OAtoGS.compute_travel_times()
ttm_walking_OAtoGS.head()

In [None]:
median_times = ttm_walking_OAtoGS.groupby("from_id")["travel_time"].median()
median_times

In [None]:
# ttm_cycling_OAtoGS = TravelTimeMatrixComputer(
#     transport_network,
#     origins=origin_centroids_wgs84,
#     destinations=accesspoints_wgs84,
#     max_time=dt.timedelta(seconds = 900),
#     speed_walking=4.8,
#     transport_modes=[LegMode.WALK]
# )
# ttm_cycling_OAtoGS = ttm_cycling_OAtoGS.compute_travel_times()
# ttm_walking_OAtoGS.head()

In [None]:
# ttm_walking_OAtoGS.to_csv("/Users/azanchetta/OneDrive - The Alan Turing Institute/Research/projects/LandUseDemonstrator/output/ttm_gs_OAtoGS_15min.csv")

## Accessibility calculation
Using `tracc` package and some original thoughts of mine

In [None]:
df_tracc = tracc.costs(ttm_walking_OAtoGS)
df_tracc.data.head()

In [None]:
max_time = ttm_walking_OAtoGS.groupby("from_id")["travel_time"].max()
max_time.max()

In [None]:
# Computing impedance function based on a 15 minute travel time threshold.
df_tracc.impedence_calc(
    cost_column = "travel_time",
    impedence_func = "cumulative",
    impedence_func_params = 15, # minutes cap
    output_col_name = "cum_15",
    prune_output = False
)
df_tracc.data.head()

In [None]:
df_tracc.data.describe()

In [None]:
df_acc_df = df_tracc.data # to visualise the dataframe more easily

In [None]:
# checking results using qgis on the side
# df[df.iloc[:, 1] >= 60.0]
trydf = df_acc_df[(df_acc_df.from_id  == "E00041377")] #|df_acc_df.loc[:,"travel_time"] >=0.0]
trydf = trydf[trydf.loc[:,"travel_time"]<=5]


In [None]:
# generating df with only values up to 15 min (not sure why but they arrive to 29 max, despite requesting a 15min cap)
# and on top of this counting the amount of available greenspace accesspoints reachable from each OAs
df_tracc_15min = df_tracc.data[df_tracc.data.loc[:,"cum_15"]==1]
df_tracc_15min.describe()

In [None]:
count_gs = df_tracc_15min.loc[:,("from_id","cum_15")].groupby("from_id").count()

In [None]:
# plotting results
oas_boundaries_accgs = oas_boundaries_wgs84.merge(count_gs,
                                                     left_on = 'geo_code',
                                                     right_on = "from_id",
                                                     how = "right")
oas_boundaries_accgs.head()
# oas_boundaries_accgs.A_pop_cum_15.isnull().sum().sum() # (checking number of na in column) -> zero


In [None]:
oas_boundaries_accgs.explore(column = "cum_15",
                               cmap="plasma",
                               scheme='NaturalBreaks',
                               k=8
                               )

In [None]:
oas_boundaries_accgs.head()

In [None]:
oas_boundaries_accgs.describe()

In [None]:
count_gs.to_csv("../output/acc_gs15_OAtoGS_tynewear.csv")