# Visualization and data analysis of output indicators 

This notebook presents data visualization and analysis for output indicators from the Global indicator project.   
The analyses consist of two major components:  
   1. Within-city variations
    - Show maps of walkability indicators for all cities and do a visual sanity check to see if any issue occurs
    - Interpret the within-city variation patterns
    - Pick one or two cities as examples, plot different indicators and compare, interprete the within-city variations and how that may or may not represent the real-world situation

   2. Between-city analysis
    - Show tables for measurements and raw indicator number, rank cities from the highest to the lowest, and interprete the results
    - Plot in a world map using graduated symbol or color to visualize and compare indicators across cities
    - Create box plot to compare median statistics across cities
    - We could may be do similar analyses like policy indicators analyses to color code cities based on the lancet study threshold?
    

**Note: Refer to the [workflow documentation](https://github.com/gboeing/global-indicators/blob/master/documentation/workflow.md) for indicators tables and description**
    
    

In [9]:
import geopandas as gpd
import os
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

%matplotlib inline

In [10]:
image_path = './images'
dpi = 300

process_folder = '../process'
process_config_path = '../process/configuration/cities.json'

In [11]:
with open(process_config_path) as json_file:
    config = json.load(json_file)

output_folder = os.path.join(process_folder, config['folder'])
input_folder = os.path.join(process_folder, config['input_folder'])

# the path of "global_indicators_hex_250m.gpkg"
gpkgOutput_hex250 = os.path.join(output_folder, config['output_hex_250m'])

# create the path of "global_indicators_city.gpkg"
gpkgOutput_cities = os.path.join(output_folder, config['global_indicators_city'])

In [12]:
cities = ['adelaide',
 'auckland',
 'baltimore',
 'bangkok',
 'barcelona',
 'belfast',
 'bern',
 'chennai',
 'mexico_city',
 'cologne',
 'ghent',
 'graz',
 'hanoi',
 'hong_kong',
 'lisbon',
 'melbourne',
 'odense',
 'olomouc',
 'sao_paulo',
 'phoenix',
 'seattle',
 'sydney',
 'valencia',
 'vic']

In [13]:
city_names = {'adelaide' : 'Adelaide',
          'auckland' : 'Auckland',
          'baltimore' : 'Baltimore',
          'bangkok' : 'Bangkok',
          'barcelona' : 'Barcelona',
          'belfast' : 'Belfast',
          'bern' : 'Bern',
          'chennai' : 'Chennai',
          'mexico_city' : 'Mexico City',
          'cologne' : 'Cologne',
          'ghent' : 'Ghent',
          'graz' : 'Graz',
          'hanoi' : 'Hanoi',
          'hong_kong' : 'Hong Kong',
          'lisbon' : 'Lisbon',
          'melbourne' : 'Melbourne',
          'odense' : 'Odense',
          'olomouc' : 'Olomouc',
          'sao_paulo' : 'Sau Paulo',
          'phoenix' : 'Phoenix',
          'seattle' : 'Seattle',
          'sydney' : 'Sydney',
          'valencia' : 'Valencia',
          'vic' : 'Vic'}

In [14]:
for city in cities:
    #city_name = city
    #city_name = 'hong_kong'
    print(city_names[city])

Adelaide
Auckland
Baltimore
Bangkok
Barcelona
Belfast
Bern
Chennai
Mexico City
Cologne
Ghent
Graz
Hanoi
Hong Kong
Lisbon
Melbourne
Odense
Olomouc
Sau Paulo
Phoenix
Seattle
Sydney
Valencia
Vic


In [15]:
scheme = 'NaturalBreaks'
scheme2 = 'UserDefined'
k = 5
cmap = 'plasma'
edgecolor = 'none'
city_color = 'none'
city_edge = 'w'
city_edge_lw = 0.2
title_y = 1.02
title_fontsize = 16
title_weight = 'bold'

fontcolor = 'w'
params = {"text.color" : fontcolor,
          "ytick.color" : fontcolor,
          "xtick.color" : fontcolor}
plt.rcParams.update(params)

## Master Dataframe

In [16]:
hex250_adelaide = gpd.read_file(gpkgOutput_hex250,layer='adelaide')
hex250_auckland = gpd.read_file(gpkgOutput_hex250,layer='auckland')
hex250_baltimore = gpd.read_file(gpkgOutput_hex250,layer='baltimore')
hex250_bangkok = gpd.read_file(gpkgOutput_hex250,layer='bangkok')
hex250_barcelona = gpd.read_file(gpkgOutput_hex250,layer='barcelona')
hex250_belfast = gpd.read_file(gpkgOutput_hex250,layer='belfast')
hex250_bern = gpd.read_file(gpkgOutput_hex250,layer='bern')
hex250_chennai = gpd.read_file(gpkgOutput_hex250,layer='chennai')
hex250_mexico_city = gpd.read_file(gpkgOutput_hex250,layer='mexico_city')
hex250_cologne = gpd.read_file(gpkgOutput_hex250,layer='cologne')
hex250_ghent = gpd.read_file(gpkgOutput_hex250,layer='ghent')
hex250_graz = gpd.read_file(gpkgOutput_hex250,layer='graz')
hex250_hanoi = gpd.read_file(gpkgOutput_hex250,layer='hanoi')
hex250_hong_kong = gpd.read_file(gpkgOutput_hex250,layer='hong_kong')
hex250_lisbon = gpd.read_file(gpkgOutput_hex250,layer='lisbon')
hex250_melbourne = gpd.read_file(gpkgOutput_hex250,layer='melbourne')
hex250_odense = gpd.read_file(gpkgOutput_hex250,layer='odense')
hex250_olomouc = gpd.read_file(gpkgOutput_hex250,layer='olomouc')
hex250_sao_paulo = gpd.read_file(gpkgOutput_hex250,layer='sao_paulo')
hex250_phoenix = gpd.read_file(gpkgOutput_hex250,layer='phoenix')
hex250_seattle = gpd.read_file(gpkgOutput_hex250,layer='seattle')
hex250_sydney = gpd.read_file(gpkgOutput_hex250,layer='sydney')
hex250_valencia = gpd.read_file(gpkgOutput_hex250,layer='valencia')
hex250_vic = gpd.read_file(gpkgOutput_hex250,layer='vic')

DriverError: ../process/data/output/global_indicators_hex_250m.gpkg: No such file or directory

In [None]:
hex250_bangkok = hex250_bangkok.drop(['pct_access_500m_pt_gtfs_any_binary', 
                                      'pct_access_500m_pt_gtfs_freq_20_binary', 
                                      'pct_access_500m_pt_gtfs_freq_30_binary'], axis=1)
hex250_ghent = hex250_ghent.drop(['pct_access_500m_pt_gtfs_any_binary', 
                                      'pct_access_500m_pt_gtfs_freq_20_binary', 
                                      'pct_access_500m_pt_gtfs_freq_30_binary'], axis=1)
hex250_graz = hex250_graz.drop(['pct_access_500m_pt_gtfs_any_binary', 
                                      'pct_access_500m_pt_gtfs_freq_20_binary', 
                                      'pct_access_500m_pt_gtfs_freq_30_binary'], axis=1)
hex250_olomouc = hex250_olomouc.drop(['pct_access_500m_pt_gtfs_any_binary', 
                                      'pct_access_500m_pt_gtfs_freq_20_binary', 
                                      'pct_access_500m_pt_gtfs_freq_30_binary'], axis=1)
hex250_vic = hex250_vic.drop(['pct_access_500m_pt_gtfs_any_binary', 
                                      'pct_access_500m_pt_gtfs_freq_20_binary', 
                                      'pct_access_500m_pt_gtfs_freq_30_binary'], axis=1)

In [None]:
city_data_list = [hex250_adelaide, hex250_auckland, hex250_baltimore, hex250_bangkok,
             hex250_barcelona, hex250_belfast, hex250_bern, hex250_chennai,
             hex250_mexico_city, hex250_cologne, hex250_ghent, hex250_graz,
             hex250_hanoi, hex250_hong_kong, hex250_lisbon, hex250_melbourne,
             hex250_odense, hex250_olomouc, hex250_sao_paulo, hex250_phoenix, 
             hex250_seattle, hex250_sydney, hex250_valencia, hex250_vic]

In [None]:
city_data = pd.concat(city_data_list)

## Within-city hex-level walkability maps (weighted by natural breaks)

In [None]:
%%time
col = 'all_cities_walkability'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)

    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2, 
                         classification_kwds={'bins':[-4.34, -2, -1, 1, 4]},
                         k=k, cmap=cmap, edgecolor=edgecolor, label=city, legend=False, legend_kwds=None)

    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Within-City Walkability Index', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-walkability.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()

In [None]:
%%time
col = 'pct_access_500m_public_open_space_any_binary'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)
    
    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2,
                         classification_kwds={'bins':[0.00, 13.51, 40.62, 67.06, 89.47]}, 
                         k=k, cmap=cmap, edgecolor=edgecolor,
                         label=city, legend=False, legend_kwds=None)

    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Access to Any Public Open Space', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-openspace-any.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()

In [None]:
%%time
col = 'pct_access_500m_public_open_space_large_binary'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)
    
    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2,
                         classification_kwds={'bins':[0.00, 12.31, 37.50, 63.48, 87.62]}, 
                         k=k, cmap=cmap, edgecolor=edgecolor,
                         label=city, legend=False, legend_kwds=None)

    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Access to Large Public Open Space', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-openspace-large.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()

In [None]:
%%time
col = 'pct_access_500m_pt_gtfs_any_binary'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)
    
    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2,
                          classification_kwds={'bins':[0.00, 13.64, 40.48, 66.00, 88.79]}, 
                          k=k, cmap=cmap, edgecolor=edgecolor,
                          label=city, legend=False, legend_kwds=None)

    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Access to Any Transit', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-transit-any.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()

In [None]:
%%time
col = 'pct_access_500m_pt_gtfs_freq_20_binary'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)
    
    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2,
                          classification_kwds={'bins':[0.00, 13.04, 39.53, 65.45, 88.57]}, 
                          k=k, cmap=cmap, edgecolor=edgecolor,
                          label=city, legend=False, legend_kwds=None)
 
    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Access to Transit at 20 Minute Frequency', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-transit-20.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()

In [None]:
%%time
col = 'pct_access_500m_pt_gtfs_freq_30_binary'
fig, axes = plt.subplots(nrows=6, ncols=4, figsize=(8, 8), facecolor='k')

for count, (ax, city) in enumerate(zip(axes.flatten(), cities)):
    print(count, city, end=' ')
    hex250 = gpd.read_file(gpkgOutput_hex250, layer=city)
    city_bound = gpd.read_file(gpkgOutput_cities, layer=city)
    
    #plot indicators
    if hex250[col].sum() == 0:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
    else:
        ax = city_bound.plot(ax=ax, color=city_color, edgecolor=city_edge, linewidth=city_edge_lw)
        ax = hex250.plot(ax=ax, column=col, scheme=scheme2,
                          classification_kwds={'bins':[0.00, 13.64, 41.18, 67.50, 89.58]}, 
                          k=k, cmap=cmap, edgecolor=edgecolor,
                          label=city, legend=False, legend_kwds=None)

    ax.set_title(city_names[city], color=fontcolor, fontsize=10)
    ax.set_axis_off()

# add a title to the figure
fig.suptitle('Access to Transit at 30 Minute Frequency', y=title_y, fontsize=title_fontsize, weight=title_weight)
fig.tight_layout()

save_path = os.path.join(image_path, 'map-transit-30.png')
fig.savefig(save_path, dpi=dpi, bbox_inches='tight', facecolor=fig.get_facecolor())
plt.close()