In [1]:
import geopandas as gpd
import json
import matplotlib.pyplot as plt
import numpy as np
import osmnx as ox
import pandas as pd
from geopandas.plotting import _flatten_multi_geoms as flatten
from scipy import stats

indicators_path = 'data/tracts_indicators_grades_eras_index.csv'
tracts_path = 'data/tracts_shapefile'

crs = {'init':'epsg:4326'}

In [2]:
with open('data/states_by_fips.json') as f:
    fips_to_state = json.load(f)
states = [k for k, v in fips_to_state.items() if v['abbreviation'] not in ['GU', 'AS', 'CZ', 'VI', 'PR']]
len(states)

51

In [3]:
indicators = pd.read_csv(indicators_path, dtype={'geoid':str})
len(indicators)

72663

In [4]:
tracts = gpd.read_file(tracts_path, crs=crs).sort_values('GEOID')
len(tracts)

74133

In [5]:
gdf = gpd.GeoDataFrame(pd.merge(indicators, tracts, how='right', left_on='geoid', right_on='GEOID'), crs=crs)
len(gdf)

  return _prepare_from_string(" ".join(pjargs))


74133

In [6]:
# only retain tracts in 50 states + DC
gdf = gdf[gdf['STATEFP'].isin(states)]

# only retain tracts with at least 1 network node
#gdf = gdf[gdf['n'] > 0]
len(gdf)

73056

In [7]:
responses = ['grid_index']
pd.isnull(gdf[responses]).sum()

grid_index    397
dtype: int64

In [8]:
gdf = gdf.replace([np.inf, -np.inf], np.nan)
pd.isnull(gdf[responses]).sum()

grid_index    397
dtype: int64

In [9]:
gdf = gdf.sort_values('geoid')
gdf[responses] = gdf[responses].interpolate() #better to do spatial interpoliate with pysal spatial lag 
pd.isnull(gdf[responses]).sum()

grid_index    0
dtype: int64

In [10]:
# if all else fails, fill missing values with median value
gdf[responses] = gdf[responses].fillna(gdf[responses].median())
pd.isnull(gdf[responses]).sum()

grid_index    0
dtype: int64

In [11]:
len(gdf)

73056

## Trend over time

In [12]:
cols_majority = [c for c in gdf.columns if 'dummy_majority' in c]
print(gdf[cols_majority].sum())
gdf[cols_majority].sum().sum()

dummy_majority_prop_1939_earlier    5751.0
dummy_majority_prop_1940_49           85.0
dummy_majority_prop_1950_59         1215.0
dummy_majority_prop_1960_69          544.0
dummy_majority_prop_1970_79         1147.0
dummy_majority_prop_1980_89          984.0
dummy_majority_prop_1990_99         1050.0
dummy_majority_prop_2000_09         1817.0
dummy_majority_prop_2010_later        93.0
dtype: float64


12686.0

In [13]:
cols_plurality = [c for c in gdf.columns if 'dummy_plurality' in c]
print(gdf[cols_plurality].sum())
gdf[cols_plurality].sum().sum()

dummy_plurality_prop_1939_earlier    18096.0
dummy_plurality_prop_1940_49          1139.0
dummy_plurality_prop_1950_59          8546.0
dummy_plurality_prop_1960_69          5356.0
dummy_plurality_prop_1970_79         12418.0
dummy_plurality_prop_1980_89          7377.0
dummy_plurality_prop_1990_99          9115.0
dummy_plurality_prop_2000_09          9623.0
dummy_plurality_prop_2010_later        555.0
dtype: float64


72225.0

In [14]:
cols_earliest = [c for c in gdf.columns if 'dummy_earliest' in c]
print(gdf[cols_earliest].sum())
gdf[cols_earliest].sum().sum()

dummy_earliest_prop_1939_earlier    19552.0
dummy_earliest_prop_1940_49          1636.0
dummy_earliest_prop_1950_59          8510.0
dummy_earliest_prop_1960_69          5730.0
dummy_earliest_prop_1970_79         12689.0
dummy_earliest_prop_1980_89          6986.0
dummy_earliest_prop_1990_99          8242.0
dummy_earliest_prop_2000_09          4715.0
dummy_earliest_prop_2010_later        159.0
dtype: float64


68219.0

In [15]:
cols_cumulative = [c for c in gdf.columns if 'dummy_cumulative' in c]
print(gdf[cols_cumulative].sum())
gdf[cols_cumulative].sum().sum()

dummy_cumulative_prop_1939_earlier     5751.0
dummy_cumulative_prop_1940_49          3913.0
dummy_cumulative_prop_1950_59          9646.0
dummy_cumulative_prop_1960_69         10479.0
dummy_cumulative_prop_1970_79         16311.0
dummy_cumulative_prop_1980_89         13813.0
dummy_cumulative_prop_1990_99          8624.0
dummy_cumulative_prop_2000_09          3596.0
dummy_cumulative_prop_2010_later         92.0
dtype: float64


72225.0

In [16]:
cols_primary = [c for c in gdf.columns if 'dummy_primary' in c]
print(gdf[cols_primary].sum())
gdf[cols_primary].sum().sum()

dummy_primary_prop_1939_earlier    21151.0
dummy_primary_prop_1940_49          1550.0
dummy_primary_prop_1950_59          9232.0
dummy_primary_prop_1960_69          5645.0
dummy_primary_prop_1970_79         12368.0
dummy_primary_prop_1980_89          6951.0
dummy_primary_prop_1990_99          7800.0
dummy_primary_prop_2000_09          7160.0
dummy_primary_prop_2010_later        368.0
dtype: float64


72225.0

In [17]:
cols_ztrax = [c for c in gdf.columns if 'dummy_ztrax' in c]
print(gdf[cols_ztrax].sum())
gdf[cols_ztrax].sum().sum()

dummy_ztrax_prop_1939_earlier    20528.0
dummy_ztrax_prop_1940_49          4976.0
dummy_ztrax_prop_1950_59          8846.0
dummy_ztrax_prop_1960_69          9269.0
dummy_ztrax_prop_1970_79         10859.0
dummy_ztrax_prop_1980_89          8807.0
dummy_ztrax_prop_1990_99          3713.0
dummy_ztrax_prop_2000_09          1412.0
dummy_ztrax_prop_2010_later        208.0
dtype: float64


68618.0

In [18]:
cols_prim_ztrax = [c for c in gdf.columns if 'dummy_prim_ztrax' in c]
print(gdf[cols_prim_ztrax].sum())
gdf[cols_prim_ztrax].sum().sum()

dummy_prim_ztrax_prop_1939_earlier    27826.0
dummy_prim_ztrax_prop_1940_49          4411.0
dummy_prim_ztrax_prop_1950_59          9058.0
dummy_prim_ztrax_prop_1960_69          8224.0
dummy_prim_ztrax_prop_1970_79         11625.0
dummy_prim_ztrax_prop_1980_89          7169.0
dummy_prim_ztrax_prop_1990_99          3164.0
dummy_prim_ztrax_prop_2000_09          1094.0
dummy_prim_ztrax_prop_2010_later         34.0
dtype: float64


72605.0

In [19]:
type_primary = 'primary' #either 'majority' or 'plurality' or 'earliest' to designate primary era of development
cols_primary = cols_primary #either majority or plurality or earliest, match type_primary
response = 'grid_index'

# what variables to visualize time series trends for, and their labels
variables = {response : 'Grid Index',
             'orientation_order' : 'Orientation Order',
             'straightness' : 'Average Straightness',
             'prop_4way' : '4-Way Intersection Proportion',
             'prop_deadend' : 'Dead-End Proportion',
             'intersect_density' : 'Intersection Density',
             'length_mean' : 'Average Street Length',
             'vehicles_per_household' : 'Vehicles Per Household'}

def summarize_decades(gdf, cols_primary, type_primary, variables=variables):

    means = {}
    lowers = {}
    uppers = {}
    
    for col in cols_primary:
        decade = col.replace(f'dummy_{type_primary}_prop_', '')
        means[decade] = {}
        lowers[decade] = {}
        uppers[decade] = {}
        
        subset = gdf[gdf[col]==1].dropna(subset=list(variables.keys()))
        for var in variables:
            mean = subset[var].mean()
            means[decade][var] = mean
            conf_lower, conf_upper = stats.t.interval(0.95, len(subset[var])-1, loc=mean, scale=stats.sem(subset[var]))
            lowers[decade][var] = conf_lower
            uppers[decade][var] = conf_upper

    df_means = pd.DataFrame(means).T
    df_lowers = pd.DataFrame(lowers).T
    df_uppers = pd.DataFrame(uppers).T
    return df_means, df_lowers, df_uppers

In [None]:
# plot primary vintage + confidence interval
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 8))
for col, ax in zip(variables.keys(), axes.flat):
    
    # calculate decade/variable means and 95% confidence intervals
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_primary, type_primary)
    
    # plot conf intervals, or don't: this only makes sense if you've sampled from a population but we're looking at the entire population of urban tracts
    ax.fill_between(df_means.index, df_uppers[col], df_lowers[col], color='#1f77b4', alpha=0.3, edgecolor='')
    
    # plot means' lines
    df_means[col].plot(ax=ax, lw=2, marker='o', markerfacecolor='w', c='#1f77b4', markeredgewidth=1.5)
    
    # style the plot
    ax.set_xlim((-0.5, len(df_means.index) - 0.5))
    margin = (df_means[col].max() - df_means[col].min()) * 0.2
    ax.set_ylim((df_means[col].min() - margin, df_means[col].max() + margin))
    
    # set tick marks and labels
    ax.set_xticks(range(len(df_means.index)))
    xticklabels = ['Pre-1940', '1940s', '1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
    ax.set_xticklabels(xticklabels, rotation=45, rotation_mode='anchor', ha='right')
    for tick in ax.get_xticklabels() + ax.get_yticklabels():
        tick.set_fontname('Arial')
        tick.set_fontsize(11)
    
    # title the subplot and set the grid
    ax.set_title(variables[col], fontdict={'family':'Arial', 'size':14})
    ax.grid(True, ls=':', lw=0.5)
    
fig.subplots_adjust(wspace=1, hspace=0.5)
fig.tight_layout()
fig.savefig(f'images/decades-urban-primary-ci.png', bbox_inches='tight', dpi=300)
plt.close() 

In [None]:
# plot earliest, primary, and plurality together as line plots
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 8))
for col, ax in zip(variables.keys(), axes.flat):
    
    # plot the points and line   
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_plurality, 'plurality')
    df_means[col].plot(ax=ax, lw=1.5, markersize=0, c='#999999', alpha=1, ls=':', dashes=(1, 1))
    
    #df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_cumulative, 'cumulative')
    #df_means[col].plot(ax=ax, lw=1.5, markersize=0, c='#999999', alpha=1, ls=':')
    
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_earliest, 'earliest')
    df_means[col].plot(ax=ax, lw=1.5, markersize=0, c='#999999', alpha=1, ls='--', dashes=(4, 2))
    
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_primary, 'primary')
    df_means[col].plot(ax=ax, lw=2, marker='o', markerfacecolor='w', c='#1f77b4', markeredgewidth=2)
    
    # set the x and y limits
    ax.set_xlim((-0.5, len(df_means.index) - 0.5))
    margin = (df_means[col].max() - df_means[col].min()) * 0.2
    ax.set_ylim((df_means[col].min() - margin, df_means[col].max() + margin))
    
    # set tick marks and labels
    ax.set_xticks(range(len(df_means.index)))
    xticklabels = ['Pre-1940', '1940s', '1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
    ax.set_xticklabels(xticklabels, rotation=45, rotation_mode='anchor', ha='right')
    for tick in ax.get_xticklabels() + ax.get_yticklabels():
        tick.set_fontname('Arial')
        tick.set_fontsize(11)
    
    # title the subplot and set the grid
    ax.set_title(variables[col], fontdict={'family':'Arial', 'size':14})
    ax.grid(True, ls='-', lw=0.25)
    
fig.subplots_adjust(wspace=1, hspace=0.5)
fig.tight_layout()
fig.savefig(f'images/decades-urban-primary-earliest-plurality.png', bbox_inches='tight', dpi=300)
plt.close()

In [21]:
# plot primary vs HISDAC-US (ztrax) vs earliest method
fig, axes = plt.subplots(nrows=2, ncols=4, figsize=(15, 8))
for col, ax in zip(variables.keys(), axes.flat):
    
    # plot the points and lines    
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_primary, 'primary')
    df_means[col].plot(ax=ax, lw=1.5, markersize=0, c='#1f77b4', alpha=1, ls='-')
    
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_earliest, 'earliest')
    df_means[col].plot(ax=ax, lw=1.5, markersize=0, c='#1f77b4', alpha=1, ls='--', dashes=(4, 2))
    
    df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_ztrax, 'ztrax')
    df_means[col].plot(ax=ax, lw=2, markersize=0, c='#1f77b4', alpha=1, ls=':')
    
    # set the x and y limits
    ax.set_xlim((-0.5, len(df_means.index) - 0.5))
    margin = (df_means[col].max() - df_means[col].min()) * 0.2
    ax.set_ylim((df_means[col].min() - margin, df_means[col].max() + margin))
    
    # set tick marks and labels
    ax.set_xticks(range(len(df_means.index)))
    xticklabels = ['Pre-1940', '1940s', '1950s', '1960s', '1970s', '1980s', '1990s', '2000s', '2010s']
    ax.set_xticklabels(xticklabels, rotation=45, rotation_mode='anchor', ha='right')
    for tick in ax.get_xticklabels() + ax.get_yticklabels():
        tick.set_fontname('Arial')
        tick.set_fontsize(13)
    
    # title the subplot and set the grid
    ax.set_title(variables[col], fontdict={'family':'Arial', 'size':15})
    ax.grid(True, ls='-', lw=0.25)
    
fig.subplots_adjust(wspace=1, hspace=0.5)
fig.tight_layout()
fig.savefig(f'images/decades-urban-primary-earliest-hisdac.png', bbox_inches='tight', dpi=300)
plt.close()

In [None]:
variables 

In [None]:
vars_to_use = variables.copy()
vars_to_use['elevations_iqr'] = 'Elevations IQR'

In [None]:
df_means, df_lowers, df_uppers = summarize_decades(gdf[gdf['is_urban']==1], cols_primary, 'primary', variables=vars_to_use)
df_means

## Map study sites

In [None]:
usa = gpd.read_file('data/cb_2017_us_state_20m')
mask = ~((usa['STUSPS']=='AK') | (usa['STUSPS']=='HI') | (usa['STUSPS']=='PR'))
usa = usa[mask]
crs_proj = '+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs '
usa_proj = usa.to_crs(crs_proj)
xmin, ymin, xmax, ymax = usa_proj.unary_union.bounds
aspect_ratio = (xmax - xmin) / (ymax - ymin) 

In [None]:
states = [k for k, v in fips_to_state.items() if v['abbreviation'] not in ['AK', 'HI']]

In [None]:
%%time
# project the gdf
gdf_proj = gdf.to_crs(crs_proj)

In [None]:
mask = gdf_proj['state'].isin(states)
gdf_proj = gdf_proj[mask]
len(gdf_proj)

In [None]:
gdf_plot = gdf_proj[[response, 'geometry']].sort_values(response).copy() #sort low to high

# convert response from (0.00 to 1.00) to (0 to 10) range
gdf_plot['category'] = (gdf_plot[response] * 10).round(0).astype(int)

In [None]:
# there are 11 categories (0-10); get 9 colors then double-up the first and last to best match the colorbar legend and improve legibility
colors = ox.get_colors(n=9, cmap='inferno', start=0.15, return_hex=True) #dark to light
colors.insert(0, colors[0])
colors.append(colors[-1])
gdf_plot['color'] = gdf_plot['category'].map(lambda x: colors[x])

In [None]:
flat_geoms, flat_colors = flatten(gdf_plot['geometry'], gdf_plot['color'])
flat_geoms = gpd.GeoSeries(flat_geoms)
print(len(flat_geoms))
print(len(flat_colors))

gdf_proj = gdf_proj.sort_values(response) #low to high
colors = ox.get_colors(len(gdf_proj), cmap='inferno', start=0.15) #dark to light
len(colors)

flat_geoms, flat_colors = flatten(gdf_proj['geometry'], colors)
flat_geoms = gpd.GeoSeries(flat_geoms)
print(len(flat_geoms))
print(len(flat_colors))

In [None]:
%%time
width = 14
fig, ax = plt.subplots(figsize=(width, width/aspect_ratio), facecolor='k')

ax = flat_geoms.plot(ax=ax, facecolor=flat_colors, edgecolor=flat_colors, linewidth=0.2)
ax = usa_proj.plot(ax=ax, facecolor='none', edgecolor='w', linewidth=0.4)

ax.set_xlim((xmin, xmax))
ax.set_ylim((ymin, ymax))

ax.axis('off')
fig.tight_layout()
fig.savefig('images/grid_index-choropleth.png', facecolor=fig.get_facecolor(), edgecolor='none', bbox_inches='tight', dpi=300)
plt.close()

In [None]:
fig, ax = plt.subplots()
raster = np.outer(np.ones(10), np.arange(100))
ax.imshow(raster, cmap='inferno')
ax.axis('off')
fig.tight_layout()
fig.savefig('images/grid_index-colorbar.png', bbox_inches='tight', dpi=300)
plt.close() 

## Visualize KDE

In [None]:
bandwidth = 0.7
legend_fontsize = 6.25
label_fontsize = 14
fontname = 'Arial'

In [None]:
gdf['state_abbrev'] = gdf['STATEFP'].map(lambda x: fips_to_state[x]['abbreviation'] if x in fips_to_state else None)

In [None]:
groups = gdf.sort_values('state_abbrev').groupby('state_abbrev')
colors = ox.get_colors(len(groups), cmap='hsv_r', start=0.2)

In [None]:
states = groups[response].median().sort_values().index

In [None]:
fig, ax = plt.subplots(figsize=(7, 7))
for state, c in zip(reversed(states), reversed(colors)):
    group = groups.get_group(state)
    ax = group[response].plot.kde(ax=ax, label=state, linewidth=1.5, alpha=0.6,
                                        color=c, ls='-', bw_method=bandwidth)

ax.set_ylim(bottom=0, top=3.5)
ax.set_xlim(left=-0.19, right=1.19)
ax.set_xlabel('Grid Index', fontsize=label_fontsize, fontname=fontname)
ax.set_ylabel('Probability Density', fontsize=label_fontsize, fontname=fontname)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5), prop={'size':legend_fontsize, 'family':fontname})
fig.tight_layout()
fig.savefig('images/kde-grid_index.png', bbox_inches='tight', dpi=600)
plt.close()