In [1]:
from graph_ds import PyH3Graph
from pois_to_h3 import get_pois_h3
from ghsl_processing import get_origins, city_boundaries_to_h3 

import pandas as pd
import h3.api.numpy_int as h3
import geopandas as gpd
import matplotlib.pyplot as plt
import contextily as cx
import numpy as np
import seaborn as sns
import pickle


def plot_hex_df(df, markersize, color_column=None, color_categorical=False):
    df[['y','x']] = list(df['h3_index'].apply(h3.h3_to_geo))
    # Convert the pandas DataFrame to a GeoPandas DataFrame with a Point geometry column
    gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y))

    fig, ax = plt.subplots(figsize=(10,10))
    # Show the GeoPandas DataFrame

    if color_column:
        gdf.plot(ax=ax, column=color_column, categorical=color_categorical,
                  legend=True, markersize=markersize, alpha=0.5, cmap='Reds',
                  vmin=0, vmax=25)
    else:
        gdf.plot(ax=ax, markersize=markersize, alpha=0.5)

    #plt.xlim(12.391681,12.737388)
    #plt.ylim(55.549206, 55.759991)

    cx.add_basemap(ax = ax, crs="EPSG:4326")
    #plt.show()

def routing(category_set, origins, destinations, num_origins, graph):
    mins={}
    category_travel_time_means = {}
    for c in category_set:
        cat_destinations = list(destinations[destinations['category']==c]['h3_index'])
        ds = graph.matrix_distance(origins=origins, destinations=cat_destinations, dynamic_infinity=True)

        # they might be different lengths, so we can't use a df
        mins[c]={k:np.nanmin(np.array(v,dtype=np.float64)) for k,v in ds.items() if v}
        category_travel_time_means[c] = np.nanmean(list(mins[c].values()))

    return category_travel_time_means, mins




In [2]:
osm_file = "../resources/denver-processed.osm.pbf"
gtfs_files = ["../resources/denver_gtfs.zip"]
ghsl_file = "../resources/GHS_BUILT_C_MSZ_E2018_GLOBE_R2022A_54009_10_V1_0_R5_C10.tif"

city_name = "Denver, Colorado"

In [3]:
# Parameters

# 12 is around 10 meter resolution
H3_RES = 12

# how many origins to sample
num_origins = 1000

#essential filter will be the least restrictive
essential_filter = {
    "amenity":["pharmacy","dentist","clinic","doctors","school"],
    "shop":["supermarket","greengrocer","medical_supply","grocery","wholesale"],
    "healthcare":["clinic","doctor","pharmacy","dentist"],
    "leisure":["park"], "sport":True
}

# categories considered for the n-minute calculation
category_set = ['pharmacy','park','supermarket','sport', 'school']

# how to map osm tags to categories
osm_tag_mapping = {
    "medical_supply":"pharmacy",
    "greengrocer":"supermarket",
    "wholesale":"supermarket",
    "grocery":"supermarket",
    "clinic":"doctor",
    "doctors":"doctor",
    'pitch':'sport',
    'track':'sport',
    'sports_centre':'sport'
}

In [4]:
results = {}

city_bounds_h3, bbox, bbox_pois = city_boundaries_to_h3([city_name])

origins = get_origins(H3_RES, [city_name], bbox, ghsl_file, city_bounds_h3)
print("origins", len(origins))

origins_sample = list(origins[origins['residential_bool']==1].sample(num_origins, replace=True)['h3_index'])


destinations = get_pois_h3(osm_file, essential_filter, H3_RES, category_set, osm_tag_mapping, [city_name])

for g_type in ['all', 'walk', 'walk+bike', 'walk+transit']:
        print(g_type)
        # build the graph
        graph = PyH3Graph(bike_penalty=1, k_ring=2, layers=g_type)
        graph.create(osm_path=osm_file,gtfs_paths=gtfs_files)
        # do routing
        category_means, route_mins = routing(category_set, origins_sample, destinations, num_origins, graph)
        # save results as dictionary
        results.update({g_type:{'category_means':category_means, 'route_mins':route_mins}})

file already exists for Denver, Colorado
origins 388682
file already exists for Denver, Colorado
all
processing osm pbf file: ../resources/denver-processed.osm.pbf
converted OSM file into 3362224 edges
osm graph created with 2804597 nodes in 17.860596 s
getting GTFS feed from ../resources/denver_gtfs.zip
routes: 123
gtfs graph created with 16937 nodes in 3.2694175 s
merged gtfs graph into osm graph, now has 2815983 nodes, took 16 ms
hash: 6615292897345112488


  mins[c]={k:np.nanmin(np.array(v,dtype=np.float64)) for k,v in ds.items() if v}


walk
processing osm pbf file: ../resources/denver-processed.osm.pbf
converted OSM file into 3057122 edges
osm graph created with 2510551 nodes in 16.107687 s
hash: 17802602972221290637
walk+bike
processing osm pbf file: ../resources/denver-processed.osm.pbf
converted OSM file into 3362224 edges
osm graph created with 2804597 nodes in 16.8824 s
hash: 6772064847077820635
walk+transit
processing osm pbf file: ../resources/denver-processed.osm.pbf
converted OSM file into 3057122 edges
osm graph created with 2510551 nodes in 14.8599205 s
getting GTFS feed from ../resources/denver_gtfs.zip
routes: 123
gtfs graph created with 16937 nodes in 3.4907107 s
merged gtfs graph into osm graph, now has 2521937 nodes, took 17 ms
hash: 5335406685770258643


In [5]:
import polars as pl
from lets_plot import *
LetsPlot.setup_html()

In [6]:
result_data = []
for g_type, result in results.items():
    route_mins = result['route_mins']
    for category, values in route_mins.items():
        for h3_index, value in values.items():
            lat, lon = h3.h3_to_geo(h3_index)
            result_data.append([g_type, category, h3_index, lat, lon, value])

result_df = pl.DataFrame(result_data, columns=['graph_type', 'category', 'h3', 'lat', 'lon', 'travel_time'])

result_df.head()

  result_df = pl.DataFrame(result_data, columns=['graph_type', 'category', 'h3', 'lat', 'lon', 'travel_time'])


graph_type,category,h3,lat,lon,travel_time
str,str,i64,f64,f64,f64
"""all""","""pharmacy""",631182129627307519,39.721577,-105.040655,9.716746
"""all""","""pharmacy""",631182123513599487,39.670079,-104.96764,7.958361
"""all""","""pharmacy""",631182127784901119,39.766745,-104.95188,5.940749
"""all""","""pharmacy""",631182123278384127,39.664002,-104.91056,3.586867
"""all""","""pharmacy""",631182130211851775,39.776238,-105.047545,5.60448


In [7]:
plot_df = result_df.filter((pl.col('travel_time').is_not_nan()) & (pl.col('travel_time') < 45.0))

In [8]:
ggplot(plot_df) + \
    geom_livemap() + \
    geom_point(aes(x='lon', y='lat'), size=0.5) + \
    ggtitle(f"Origins in {city_name}")

In [15]:
ggplot(plot_df.to_pandas()) + \
    geom_point(aes(x='lon', y='lat', color='travel_time'), alpha=0.5) + \
    scale_color_gradient(low='#f2f0f7', high='#54278f') + \
    facet_wrap(["graph_type", "category"], ncol=5) + \
    coord_fixed() + \
    ggtitle(f"Minimum travel times to essential services in [{city_name}]") + \
    ggsize(1000, 700)

In [16]:
ggplot(plot_df.to_pandas()) + \
    geom_density(aes(x='travel_time', fill='category'), alpha=0.3) + \
    xlim(0, 30) + \
    facet_wrap(["graph_type"], ncol=5) + \
    ggtitle(f"Minimum travel time to essential services in [{city_name}], grouped by network type")

In [17]:
benefit_df = plot_df.groupby(['h3', 'category']).agg(
    pl.col('travel_time'),
    pl.col('travel_time').len().alias('count'),
).filter(pl.col('count') == 4).groupby(['h3', 'category']).agg(
    (pl.col('travel_time').flatten().take(1) - pl.col('travel_time').flatten().take(2)).alias('bike_benefit'),
    (pl.col('travel_time').flatten().take(1) - pl.col('travel_time').flatten().take(3)).alias('transit_benefit'),
    (pl.col('travel_time').flatten().take(1) - pl.col('travel_time').flatten().take(0)).alias('multimodal_benefit'),
)

In [18]:
ggplot(benefit_df.to_pandas()) + \
    geom_histogram(aes(x='multimodal_benefit', fill='category'), alpha=0.5, bins=40) + \
    xlim(0, 15) + \
    ggtitle(f"Multimodal travel time benefit over walking in [{city_name}]")