In [1]:
import os
import numpy as np
import fiona
import pandas as pd
import geopandas as gpd
import argparse
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
import matplotlib.font_manager as fm
from textwrap import wrap
from matplotlib.backends.backend_pdf import PdfPages
import json
with open('../process/configuration/cities.json') as f:
  city_data = json.load(f)
exec(open('../process/data/GTFS/gtfs_config.py').read())

def valid_path(arg):
    arg = os.path.abspath(arg)
    if not os.path.exists(arg):
        msg = f"The path {arg} does not exist!"
        raise argparse.ArgumentTypeError(msg)
    else:
        return arg


In [2]:
# Parse input arguments
# parser = argparse.ArgumentParser(description='Analyse processed results with regard to thresholds')
# parser.add_argument('-gpkg_cities',
#                     help='path to all cities summary results geopackage',
#                     default='./data/output/November 2020/global_indicators_city_2020-11-24.gpkg',
#                     type=valid_path)
# parser.add_argument('-gpkg_hexes',
#                     help='path to all cities hexagon grid results geopackage',
#                     default='./data/output/November 2020/global_indicators_hex_250m_2020-11-24.gpkg',
#                     type=valid_path)
# args = parser.parse_args()

# dummy parsing for interactive debugging
class Object(object):
    pass

args = Object()
args.gpkg_cities = os.path.abspath('../process/data/output/global_indicators_city_2021-06-21.gpkg')
args.gpkg_hexes = os.path.abspath('../process/data/output/global_indicators_hex_250m_2021-06-21.gpkg')

cities = gpd.read_file(args.gpkg_cities, layer='all_cities_combined')
cities.set_index('City',inplace=True)
# cities

DriverError: disk I/O error: this file is a WAL-enabled database. It cannot be opened because it is presumably read-only or in a read-only directory.

In [20]:
hexes={}
for city in cities.index:
    hexes[city] = gpd.read_file(args.gpkg_hexes, layer=city.lower().replace(' ','_'))

In [4]:
hexes.keys()

dict_keys(['Maiduguri', 'Mexico City', 'Baltimore', 'Phoenix', 'Seattle', 'Sao Paulo', 'Hong Kong', 'Chennai', 'Bangkok', 'Hanoi', 'Adelaide', 'Melbourne', 'Sydney', 'Auckland', 'Graz', 'Ghent', 'Olomouc', 'Odense', 'Cologne', 'Lisbon', 'Barcelona', 'Valencia', 'Vic', 'Bern', 'Belfast'])

In [5]:
print("""
Global Indicators project, 2020

Analysis and mapping of threshold values for urban design and transport planning features associated with
  - (A) ≥80% probability of engaging in walking for transport and 
  - (B) reaching the WHO’s target of a ≥15% relative reduction in insufficient physical activity through walking

""")





Global Indicators project, 2020

Analysis and mapping of threshold values for urban design and transport planning features associated with
  - (A) ≥80% probability of engaging in walking for transport and 
  - (B) reaching the WHO’s target of a ≥15% relative reduction in insufficient physical activity through walking




In [6]:
# Calculate public transport density for hexagons, required for one scenario
gtfs_analysis_date = 20200827
gtfs_analysis_date = f'{str(gtfs_analysis_date)[0:4]}-{str(gtfs_analysis_date)[4:6]}-{str(gtfs_analysis_date)[6:]}'
gtfs_gpkg = f'../process/data/GTFS/gtfs_frequent_transit_headway_{gtfs_analysis_date}_python.gpkg'

points_in_polys = {}
for city in hexes.keys():
    _city_ = city.lower().replace(' ','_')
    if GTFS[city.lower().replace(' ','_')]==[]:
        transport_data = f"../process/data/output/{city_data['gpkgNames'][_city_]}"
        osm_layer = 'destinations'
        points_in_polys[city] = gpd.read_file(f"../process/data/output/{city_data['gpkgNames'][_city_]}",layer=osm_layer)
        points_in_polys[city] = points_in_polys[city].query('dest_name_full =="Public transport stop (any)"')
    else:
        gtfs_layer = f"{_city_}_stops_headway_{GTFS[_city_][-1]['start_date_mmdd']}_{GTFS[_city_][-1]['end_date_mmdd']}"
        points_in_polys[city] = gpd.read_file(gtfs_gpkg,layer=gtfs_layer)
        
    points_in_polys[city] = gpd.sjoin(points_in_polys[city],hexes[city],how='left', op='within')
    points_in_polys[city] = points_in_polys[city]['index_right'].dropna().astype(int)
    points_in_polys[city] = points_in_polys[city].reset_index().groupby('index_right').count().reset_index()
    points_in_polys[city].columns = ['index','pt_stops']
    # hexes[city].set_index('index')
    hexes[city] = hexes[city].join(points_in_polys[city].set_index('index'),how='left')
    hexes[city]['pt_stops_per_sqkm'] = hexes[city]['pt_stops']/hexes[city]['area_sqkm']
                                          

In [42]:
# Analysis set up
scenarios={
  'A':'≥80% probability of engaging in walking for transport', 
  'B':'reaching the WHO’s target of a ≥15% relative reduction in insufficient physical activity through walking',
  'distances':'distances to destinations, measured up to a maximum distance target threshold of 500 metres'
}
scenario_style = {
    'A':{'colour':'lightcyan','line':'dashed'},
    'B':{'colour':'palegreen','line':'solid'},
    'distances':{'colour':'lightcyan','line':'dashed'},
    }
greq = '≥'
thresholds = {
'Mean 1000 m neighbourhood population per km²':{
  'data':'hexes', # the geopackage (hexes or points)
  'variable':'local_nh_population_density', # variable; a list is required if a function is specified
  'polarity':'positive', # which is better: more (positive)? or less (negative)?
  'scenarios':{
      'A':{
        'threshold':5665, # not used; we plot the interval
        'comparison':'>', # direction in which to evaluate success (e.g. is the aim to be greater than or less than the threshold?)
        'interval':(4790, 6750),
        'interval_type':'95% CrI'
        },
      'B':{
        'threshold':6491,
        'comparison':'>',
        'interval':(5677, 7823),
        'interval_type':'95% CrI' 
        }
  }
},
'Mean 1000 m neighbourhood street intersections per km²':{
  'data':'hexes',
  'variable':'local_nh_intersection_density',
  'polarity':'positive',
  'scenarios':{
      'A':{
        'threshold':98,
        'comparison':'>',
        'interval':(90, 110),
        'interval_type':'95% CrI'
        },
      'B':{
        'threshold':122,
        'comparison':'>',
        'interval':(106, 156),
        'interval_type':'95% CrI'
        }
  }
},
'Distance to nearest public transport stops (m; up to 500m)':{
  'data':'points',
  'layer':'samplePointsData',
  'point_function':min, # take the minimum of the OSM and GTFS pt data sources, axis=1 w/ fillna w/ np.nan
  'variable':['sp_nearest_node_pt_osm_any','sp_nearest_node_pt_gtfs_any'],
  'truncate_cutoff':500, # distance measures are only formally measured up to 500m, however truncation at 500 is required 
                         # for neatness when plotting continuous distribution due to full distance measurement method
  'polarity':'negative', # shorter distance is assumed to be better, so polarity is negative
  'scenarios':{
      'distances':{
        'threshold':400,
        'comparison':'<',
        'interval':(300,500),
        'interval_type':'distance (m)',
        # 'statistic':'pop_pct_access_500m_pt_any_binary',
        }
  }
},
'Distance to nearest park (m; up to 500m)':{
  'data':'points',
  'layer':'samplePointsData',
  'variable':'sp_nearest_node_public_open_space_any',
  'truncate_cutoff':500,
  'polarity':'negative',
  'scenarios':{
      'distances':{
        'threshold':400,
        'comparison':'<',
        'interval':(300,500),
        'interval_type':'distance (m)',
        # 'statistic':'pop_pct_access_500m_public_open_space_any_binary',
        }
  }
}}



In [151]:
fontprops = fm.FontProperties(size=8)
# for city in ['Odense']:
for city in hexes.keys():
    print(city)
    study_region = cities.query(f'index=="{city}"').to_crs(hexes[city].crs).copy()
    bounds = study_region.bounds
    width = (bounds['maxx'].values[0]-bounds['minx'].values[0])
    height = (bounds['maxy'].values[0]-bounds['miny'].values[0])
    statistics = []
    # create a PdfPages object for file output
    if not os.path.exists('./reports'):
        os.mkdir('./reports')
    with PdfPages(f'reports/{city}_threshold_summary.pdf') as pdf:
        for indicator in thresholds.keys():
            data = thresholds[indicator]['data']
            variable = thresholds[indicator]['variable']
            indicator_scenarios = list(thresholds[indicator]['scenarios'].keys())
            polarity = thresholds[indicator]['polarity']
            # adjust colour scales for indicator polarities (more blue is better, or meeting achievements)
            if polarity == 'negative':
                cmap = 'viridis'
                cmap_r = 'virids_r'
            else:
                cmap = 'viridis_r'
                cmap_r = 'viridis'
            # Aggregate point data (e.g. distances) to hexes
            if data == 'points':
                layer = thresholds[indicator]['layer']
                points = gpd.read_file(f"../process/data/output/{city_data['gpkgNames'][city.lower().replace(' ','_')]}",layer=layer)
                if 'point_function' in thresholds[indicator].keys():
                    points[''.join(variable)]=points[variable].apply(lambda x: thresholds[indicator]['point_function'](x.fillna(value=np.nan)),axis=1)
                    variable = ''.join(variable)
                if 'distances' in [s for s in scenarios.keys() if s in indicator_scenarios]:
                    # fix distances > 500m or NA to 650m, to facilitate classification and plotting of '> 500m' category
                    points[variable] = points[variable].mask(points[variable] > 500, 650).mask(points[variable].isna(), 650)
                    # ensure this hex variable doesn't exist, eg as a result of debugging code
                    hexes[city] = hexes[city][[c for c in hexes[city].columns if c!=variable]]
                    hexes[city][variable] = hexes[city].merge(points.groupby('hex_id')[variable].mean().reset_index(),
                                                              left_on='index', 
                                                              right_on='hex_id')[variable]
                data = 'hexes'
            # Process maps for indicators using the hex data
            if data == 'hexes':
                if 'hex_function' in thresholds[indicator].keys():
                    hexes[city][variable]=hexes[city][variable].apply(lambda x: thresholds[indicator]['hex_function'](x),axis=1)
                              
                var_min = round(min(hexes[city][variable].dropna()),1)
                var_max = round(max(hexes[city][variable].dropna()),1)
                
                # map main indicator
                fig, ax = plt.subplots(1, 1, figsize=(11.69,8.27))
                ax.set_aspect('equal')
                study_region.plot(ax=ax, color='none', edgecolor='black',zorder=2)
                divider = make_axes_locatable(ax)
                cax = divider.append_axes("right", size="5%", pad=0.1)
                ax.set_xticks([])
                ax.set_yticks([])

                scalebar = AnchoredSizeBar(ax.transData,
                                           1000, '1000 m', 'lower right', 
                                           pad= .01,
                                           color='black',
                                           frameon=False,
                                           fontproperties=fontprops)

                ax.add_artist(scalebar)
                fig.suptitle("\n".join(wrap(indicator, 120 )))
                
                if 'distances' in indicator_scenarios:
                    hexes[city].query(f'{variable} < 500')\
                               .plot(column=variable, ax=ax, legend=True, cax=cax, cmap=cmap, zorder=1)
                else:
                    hexes[city].plot(column=variable, ax=ax, legend=True, cax=cax, cmap=cmap, zorder=1)
                
                ax.set_rasterized(True)
                pdf.savefig(fig,dpi=200)
                plt.clf()

                # map scenarios using custom splits
                interval_splits ={}
                splits = {}
                for scenario in [s for s in scenarios.keys() if s in indicator_scenarios]:
                    attributes = list(thresholds[indicator]['scenarios'][scenario].keys())
                    # categorical distribution plots for meeting scenarios
                    if ('interval' in attributes):
                        splits[scenario] = thresholds[indicator]['scenarios'][scenario]['interval']
                        interval_type = thresholds[indicator]['scenarios'][scenario]['interval_type']
                        if max in splits[scenario]:
                            splits[scenario] = [x if x!=max else var_max for x in splits[scenario]]
                        if min in splits[scenario]:
                            splits[scenario] = [x if x!=min else min(hexes[city][variable]) for x in splits[scenario]]
                        interval_splits[scenario] = list(splits[scenario]).copy()
                        split_labels = [f'within {interval_type} {splits[scenario]}']
                        if var_min < splits[scenario][0]:
                            splits[scenario] = [var_min]+list(splits[scenario])
                            split_labels = [f'below {interval_type} lower bound']+split_labels
                        if var_max > splits[scenario][-1]:
                            splits[scenario] = list(splits[scenario])+[var_max]
                            split_labels = split_labels+[f'exceeds {interval_type} upper bound']
                        #print(splits)
                        hexes[city][f'{variable}_{scenario}'] = pd.cut(hexes[city][variable], bins=splits[scenario], labels=split_labels)
                        hexes[city][f'{variable}_{scenario}']
                        fig, ax = plt.subplots(figsize=(11.69,8.27))
                        ax.set_aspect('equal')
                        study_region.plot(ax=ax, color='none', edgecolor='black', zorder=2)
                        ax.set_xticks([])
                        ax.set_yticks([])
                        scalebar = AnchoredSizeBar(ax.transData,
                                                   1000, '1000 m', 'lower right', 
                                                   pad= .01,
                                                   color='black',
                                                   frameon=False,
                                                   fontproperties=fontprops)
                        ax.add_artist(scalebar)
                        fig.suptitle("\n".join(wrap(f'{scenario}: Estimated {indicator} requirement for {scenarios[scenario]}', 120 )))
                        if 'notes' in attributes:
                            ax.set_title(f"{thresholds[indicator]['scenarios'][scenario]['notes']}")
                        
                        hexes[city].plot(column = f'{variable}_{scenario}',ax=ax,legend=True,cmap=cmap, zorder=1,legend_kwds={'borderaxespad':-4-height**.001, 'loc':'lower center'})
                        ax.set_rasterized(True)
                        pdf.savefig(fig, dpi=200)
                        plt.clf()
                    if ('statistic' in attributes):
                        statistics.append(thresholds[indicator]['scenarios'][scenario]['statistic'])
                    elif ('interval' in attributes):
                        # Estimated percentage of population meeting indicator threshold
                        percentages = (100*hexes[city]\
                                    .groupby([hexes[city][f'{variable}_{scenario}']])['pop_est']\
                                    .sum()\
                                    /hexes[city]['pop_est'].sum()).round(1)
                        for c in split_labels:
                            try:
                                statistic = f'pop_pct_{scenario} - {indicator} - {c}'
                                cities.loc[city,statistic] = percentages.loc[c]
                            except:
                                cities.loc[city,statistic] = 0
                            finally:
                                statistics.append(statistic)
                
                if scenario == 'distances':
                    # histogram of distances (including NaN as > 500m, along with other > 500m)
                    hexes[city][f'{variable}'].mask(hexes[city][variable] > 500, 650)\
                                              .mask(hexes[city][variable].isna(), 650)\
                                              .plot.hist(grid=False, bins = range(0, 700,50),xticks=range(0, 600,100),align='mid')
                    plt.text(618,0,">",verticalalignment='center')
                else:
                    hexes[city][f'{variable}'].hist(grid=False)
                
                plt.suptitle("\n".join(wrap(f'Histogram of {indicator}.',120)))
                
                subtitle = []                
                for scenario in [s for s in scenarios.keys() if s in indicator_scenarios]:
                    attributes = list(thresholds[indicator]['scenarios'][scenario].keys())
                    if ('interval' in attributes):
                        subtitle.append(f'\n\n{scenario}: {interval_type} {thresholds[indicator]["scenarios"][scenario]["interval"]}; {scenario_style[scenario]["colour"]}, {scenario_style[scenario]["line"]}')
                        for line in [x for x in splits[scenario][1:] if x!=var_max]:
                            plt.axvline(line, color='k', linestyle=scenario_style[scenario]["line"], linewidth=1)
                        plt.axvspan(*interval_splits[scenario], color=scenario_style[scenario]["colour"],alpha=0.6, zorder=2)
                if subtitle!=[]:
                        plt.title('\n'.join(['\n'.join(wrap(x,120)) for x in subtitle]))
                plt.ylabel("Frequency")
                pdf.savefig(fig)
                plt.clf()
            
  
    plt.close('all')

cities[statistics].fillna(0).transpose().to_csv(f'./reports/Global Indicators 2020 - thresholds summary estimates.csv')
cities[statistics].fillna(0).transpose()

Maiduguri
Mexico City
Baltimore
Phoenix
Seattle
Sao Paulo
Hong Kong
Chennai
Bangkok
Hanoi
Adelaide
Melbourne
Sydney
Auckland
Graz
Ghent
Olomouc
Odense
Cologne
Lisbon
Barcelona
Valencia
Vic
Bern
Belfast


City,Maiduguri,Mexico City,Baltimore,Phoenix,Seattle,Sao Paulo,Hong Kong,Chennai,Bangkok,Hanoi,...,Ghent,Olomouc,Odense,Cologne,Lisbon,Barcelona,Valencia,Vic,Bern,Belfast
pop_pct_A - Mean 1000 m neighbourhood population per km² - below 95% CrI lower bound,2.0,1.1,60.5,69.9,89.1,0.4,1.7,0.2,1.8,4.2,...,100.0,100.0,94.0,52.5,1.9,4.5,2.1,52.8,17.8,40.3
"pop_pct_A - Mean 1000 m neighbourhood population per km² - within 95% CrI (4790, 6750)",5.6,1.9,21.5,24.4,6.5,0.5,1.3,0.4,2.9,6.1,...,0.0,0.0,6.0,47.5,3.7,5.5,3.6,43.1,70.3,42.0
pop_pct_A - Mean 1000 m neighbourhood population per km² - exceeds 95% CrI upper bound,92.4,97.0,18.1,5.7,4.4,99.1,97.0,99.4,95.3,89.6,...,0.0,0.0,0.0,0.0,94.4,90.0,94.2,4.0,11.8,17.6
pop_pct_B - Mean 1000 m neighbourhood population per km² - below 95% CrI lower bound,4.1,1.9,72.0,84.3,93.6,0.6,2.3,0.4,3.0,7.0,...,100.0,100.0,100.0,78.4,3.1,7.6,4.1,75.7,41.7,59.8
"pop_pct_B - Mean 1000 m neighbourhood population per km² - within 95% CrI (5677, 7823)",7.8,2.5,16.4,14.0,3.0,0.7,1.3,0.5,3.8,7.5,...,0.0,0.0,0.0,21.6,5.3,5.0,2.9,24.3,58.3,37.6
pop_pct_B - Mean 1000 m neighbourhood population per km² - exceeds 95% CrI upper bound,88.1,95.6,11.6,1.7,3.4,98.7,96.4,99.2,93.2,85.5,...,0.0,0.0,0.0,0.0,91.5,87.3,92.9,0.0,0.0,2.5
pop_pct_A - Mean 1000 m neighbourhood street intersections per km² - below 95% CrI lower bound,54.4,10.4,35.1,25.6,38.7,12.1,4.3,9.6,38.5,32.4,...,32.5,31.0,5.1,16.1,0.2,17.4,21.3,34.7,0.8,9.0
"pop_pct_A - Mean 1000 m neighbourhood street intersections per km² - within 95% CrI (90, 110)",21.3,14.6,16.1,29.4,22.0,23.3,5.8,14.8,27.5,15.1,...,16.6,18.6,13.4,16.6,1.9,10.5,7.7,11.9,1.4,21.1
pop_pct_A - Mean 1000 m neighbourhood street intersections per km² - exceeds 95% CrI upper bound,24.3,75.0,48.7,45.0,39.3,64.7,89.9,75.6,34.0,52.5,...,50.9,50.4,81.4,67.3,97.8,72.1,70.9,53.4,97.9,70.0
pop_pct_B - Mean 1000 m neighbourhood street intersections per km² - below 95% CrI lower bound,71.5,21.4,48.2,49.1,56.8,29.6,8.5,20.7,60.4,43.7,...,45.1,45.8,14.6,28.4,1.3,25.1,27.6,43.6,1.8,26.0
