# Selected results
This notebook reports numbers/ stats that are included in the article.

Some of this is background, exploratory.

The coefficients use the beta model.

In [None]:
import pandas as pd
import geopandas as gpd
import seaborn as sns
import numpy as np

In [None]:
outputpath = '../stan_output/' # where stan coefficients and other estimates are saved
data_cities=pd.read_csv('data/data_23.csv')
data_countries = pd.read_csv('data/country_23.csv', index_col='country')

# get median estimates for every estimate of city-level data
city_level_variables=pd.read_csv(outputpath+'beta_estimates_beta.csv')
city_level_variables=city_level_variables[[col for col in city_level_variables.columns if "50%" in col]].copy()
city_level_variables.columns=[col[:-4] for col in city_level_variables.columns]
city_level_variables.columns=city_level_variables.columns.str.replace('beta1_','intercept_').str.replace('beta2_','sndi_').str.replace('beta3_','density_').str.replace(
    'beta4_','precipitation_').str.replace('beta5_','min_temp_').str.replace('beta6_','bikelanes_').str.replace('beta7_','slope_').str.replace(
    'beta8_','includes_inboundoutbound_').str.replace('beta9_','rail_in_city_').str.replace('beta10_','max_temp_').str.replace(
    'beta11_','population_').str.replace('beta12_','min_temp2_')

# add in country name
city_level_variables=city_level_variables.join(data_countries.reset_index()[['country']])

country_level_estimates=pd.read_csv(outputpath+'gamma_estimates_beta.csv')
country_level_estimates=country_level_estimates[['country_var']+[col for col in country_level_estimates.columns if "50%" in col]].copy()

In [None]:
print('N cities: ', len(data_cities))
print('N countries: ',len(data_countries))

In [None]:
# Load urban population so we can assess the coverage of our dataset
urbanpop=pd.read_csv('data/wdi_urbanpop.csv')
urbanpop=urbanpop[urbanpop['2023 [YR2023]']!='..']
urbanpop['pop2023']=urbanpop['2023 [YR2023]'].astype(float)
taiwanpop = 23588613 * .801 # CIA world factbook

# countries in Google dataset, apart from Taiwan 
google_countries = list(data_cities.country.unique()) + ['Egypt, Arab Rep.', 'Kyrgyz Republic', 'Slovak Republic', 'Syrian Arab Republic','Bahamas, The', 'Turkiye','Venezuela, RB', 'Viet Nam','Yemen, Rep.']
[cc for cc in google_countries if cc not in urbanpop['Country Name'].values]
urbanpop['in_google'] = urbanpop['Country Name'].isin(google_countries)

<b>Descriptive</b>

In [None]:
print ('Population of all cities (bn)', round(data_cities.population.sum()/1e9,3))
print('2023 urban population from WDI (bn)', round(urbanpop.pop2023.sum()/1e9,3))
# urban population from WDI: 4.52 billion in 2022
print('\t{:.1f}% of urban population'.format(data_cities.population.sum()/urbanpop.pop2023.sum()*100))
print('\t{:.1f}% of urban population in Google countries'.format(data_cities.population.sum() / (
       taiwanpop + urbanpop[urbanpop.in_google].pop2023.sum())*100))

print('Percent of trips by walking:', (100*round(data_cities.trips_on_foot_touse.sum()/data_cities.trips_total_touse.sum(),3)))

print('Percent of trips by biking:', (100*round(data_cities.trips_cycling_touse.sum()/data_cities.trips_total_touse.sum(),3)))

print('Percent of km by walking:', (100*round(data_cities.km_on_foot.sum()/data_cities.km_total.sum(),3)))

print('Percent of km by biking:', (100*round(data_cities.km_cycling.sum()/data_cities.km_total.sum(),3)))
print('Percent of km by auto and MC: {:.1f}'.format(
    (100*(data_cities.km_motorcycle.sum()+data_cities.km_automobile.sum())/data_cities.km_total.sum())))
print('Percent of km by PT: {:.1f}'.format(
    (100*data_cities.km_transit.sum()/data_cities.km_total.sum())))


print('Ratio of walk to bike trips', round(data_cities.trips_on_foot_touse.sum()/data_cities.trips_cycling_touse.sum(),1))
print('Ratio of walk to bike km', round(data_cities.km_on_foot.sum()/data_cities.km_cycling.sum(),1))


In [None]:
print('km share at country level (including transit)')
data_countries.sort_values(by='km_share_cycling', ascending=False)[['km_share_cycling']].head(10)

In [None]:
print('km share at country level (including transit)')
data_countries.sort_values(by='km_share_on_foot', ascending=False)[['km_share_on_foot']].head(15)

In [None]:
print('km share (including transit)')
data_cities[data_cities.country=='Netherlands'][['city','population','km_share_cycling']].sort_values(by='km_share_cycling',ascending=False)

In [None]:
print('Share of trips (including transit)')
data_cities[data_cities.country=='Netherlands'][['city','population','cycling_share_including_transit']].sort_values(by='cycling_share_including_transit',ascending=False)

In [None]:
print('median km share:',data_cities[data_cities.country=='Japan']['km_share_cycling'].median())
data_cities[data_cities.country=='Japan'].sort_values(by='km_share_cycling',ascending=False)[['city','state','km_share_cycling']]

<b>Model Results - National</b>

In [None]:
country_level_estimates

In [None]:
# this is how we calculate what a 1 standard deviation change in gas prices even means
data_countries['next_gas'] = data_countries['gasoline_2018'].shift(1)
data_countries['next_gas_standard'] = data_countries['gasoline_standard'].shift(1)
data_countries['gas_change'] =  data_countries['gasoline_2018']-data_countries['next_gas']
data_countries['gas_standard_change'] =  data_countries['gasoline_standard']-data_countries['next_gas_standard']
data_countries['estimate'] = data_countries['gas_change']/data_countries['gas_standard_change']

data_countries[['estimate']].mean()

<b>Density</b>

In [None]:
# examples of 1 standard deviation change
round(data_cities[data_cities.city.isin(['Seattle','San Francisco','Taipei'])][['city','country','density_standard']],2).sort_values(by='density_standard')

In [None]:
# effect size
print('Effect of density (1 stddev change)')
100*round(city_level_variables[['density_walk','density_bike']].median(),3)

In [None]:
city_level_variables[['country','density_walk']].sort_values(by='density_walk',ascending=False)

In [None]:
city_level_variables[['country','density_walk']].sort_values(by='density_walk',ascending=False).head(20)

In [None]:
city_level_variables[city_level_variables.country.isin(['Morocco','Germany'])][['country','density_walk']]

<b>Bike lanes</b>

In [None]:
# effect size
100*round(city_level_variables[['bikelanes_bike','bikelanes_walk']].median(),3)

In [None]:
# examples of 1 standard deviation change
round(data_cities[data_cities.city.isin(['Los Angeles','San Francisco'])][['city','country','bikelanes_standard']],2).sort_values(by='bikelanes_standard')

In [None]:
data_cities['travel_per_road_km']=data_cities['km_no_transit']/data_cities['roads_km']

data_cities['bikelanes_for_1sd_increase']=data_cities['roads_km']*np.std(data_cities.bikelane_per_road_km)

data_cities = data_cities.set_index('country').join(city_level_variables.set_index('country')['bikelanes_bike']).reset_index()
data_cities['biketravel_increase']=data_cities['km_no_transit']*data_cities.bikelanes_bike

data_cities['biking_per_bikelane_km']=data_cities['biketravel_increase']/data_cities['bikelanes_for_1sd_increase']

In [None]:
print('Effect of 1km bike lanes on bike km per year')

data_cities['biking_per_bikelane_km'].median()

<b>Appendix</b>

In [None]:
# what fraction of cities include in and outbound trips?
print(data_cities.groupby('includes_inboundoutbound').size())
print('{:.3f}'.format(data_cities['includes_inboundoutbound'].mean()))