In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

indicators_path = 'data/tracts_indicators_grades_eras_index.csv'
tracts_path = 'data/tracts_shapefile'
crs = {'init':'epsg:4326'}

In [2]:
decade_names = {'prop_1939_earlier' : 'Pre-1940',
                'prop_1940_49' : '1940s',
                'prop_1950_59' : '1950s',
                'prop_1960_69' : '1960s', 
                'prop_1970_79' : '1970s',
                'prop_1980_89' : '1980s',
                'prop_1990_99' : '1990s',
                'prop_2000_09' : '2000s',
                'prop_2010_later' : '2010s'}

In [3]:
indicators = pd.read_csv(indicators_path, dtype={'geoid':str})
indicators.shape

(72663, 180)

In [4]:
tracts = gpd.read_file(tracts_path)
tracts.shape

(74133, 13)

In [6]:
mask = (indicators['is_urban'] == True) & (indicators['primary_decade'] == 'prop_2010_later')
subset = indicators[mask].set_index('geoid')
subset['latlng'] = tracts.set_index('GEOID')[['INTPTLAT', 'INTPTLON']].astype(float).apply(lambda row: '{},{}'.format(row['INTPTLAT'], row['INTPTLON']), axis=1)

cols = ['latlng', 'state_abbrev', 'grid_index', 'is_urban', 'ztrax_decade', 'prim_ztrax_decade',
        'primary_decade', 'earliest_decade', 'plurality_decade', 'cumulative_decade',
        'year_min', 'year_median', 'year_mean', 'year_std']
subset = subset.reindex(columns=cols).copy()
subset.shape

(213, 14)

In [7]:
subset.sort_values('grid_index', ascending=False).head(25)

Unnamed: 0_level_0,latlng,state_abbrev,grid_index,is_urban,ztrax_decade,prim_ztrax_decade,primary_decade,earliest_decade,plurality_decade,cumulative_decade,year_min,year_median,year_mean,year_std
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
48245006100,"29.8761511,-93.9405661",TX,0.937139,1,prop_1950_59,prop_1950_59,prop_2010_later,prop_1939_earlier,prop_2010_later,prop_1970_79,1940.0,1957.5,1960.277778,20.470368
41039003800,"44.0449004,-123.0851827",OR,0.91289,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2010_later,prop_2010_later,prop_1980_89,1865.0,1905.0,1905.555556,17.730738
27053007700,"44.950155,-93.2874611",MN,0.903524,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2000_09,prop_2010_later,prop_2000_09,1885.0,1900.0,1906.5,26.0395
48245006400,"29.9034097,-93.924184",TX,0.875924,1,prop_1950_59,prop_1950_59,prop_2010_later,prop_2010_later,prop_2010_later,prop_1960_69,1940.0,1955.0,1955.526316,11.290942
42101037700,"39.9824381,-75.1506931",PA,0.848183,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2010_later,prop_2010_later,prop_1990_99,1875.0,1920.0,1919.642857,26.708129
4013113100,"33.4550381,-112.073874",AZ,0.840951,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2010_later,prop_2010_later,prop_1970_79,1890.0,1915.0,1923.4,26.446487
27053103900,"44.982613,-93.2350036",MN,0.833415,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2010_later,prop_2010_later,prop_1990_99,1880.0,1890.0,1902.0,27.748874
22071013800,"29.9961591,-90.0800463",LA,0.833405,1,,prop_2010_later,prop_2010_later,prop_2010_later,prop_2010_later,prop_2000_09,,,,
48167724600,"29.2976535,-94.8080731",TX,0.833012,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_1970_79,prop_2010_later,prop_1970_79,1885.0,1935.0,1932.5,20.044593
51510201600,"38.8115069,-77.0516818",VA,0.825111,1,prop_1939_earlier,prop_1939_earlier,prop_2010_later,prop_2010_later,prop_2010_later,prop_1990_99,1810.0,1905.0,1908.0,42.932015


## First, validation of estimated vintage

In [8]:
indicators[['ztrax_decade', 'prim_ztrax_decade', 'primary_decade', 'earliest_decade', 'plurality_decade', 'cumulative_decade']].apply(lambda se: se.value_counts())

Unnamed: 0,ztrax_decade,prim_ztrax_decade,primary_decade,earliest_decade,plurality_decade,cumulative_decade
prop_1939_earlier,20528,27826,21151,19552,18096,5751
prop_1940_49,4976,4411,1550,1636,1139,3913
prop_1950_59,8846,9058,9232,8510,8546,9646
prop_1960_69,9269,8224,5645,5730,5356,10479
prop_1970_79,10859,11625,12368,12689,12418,16311
prop_1980_89,8807,7169,6951,6986,7377,13813
prop_1990_99,3713,3164,7800,8242,9115,8624
prop_2000_09,1412,1094,7160,4715,9623,3596
prop_2010_later,208,34,368,159,555,92


In [12]:
tract_ids = {'04013810700' : 'chandler',     #1990s
             '06001423400' : 'berkeley',     #1920s
             '36059409200' : 'levittown',    #1940s
             '25025020301' : 'bos wes end',  #1960s
             '25025020101' : 'beacon hill',  #1800s
             '32003005824' : 'vegas sum w',  #2000s vegas summerlin west
             '06001403100' : 'oakland dt',   #1940s
             '06059052526' : 'irvine',       #1970s
             '06067009619' : 'laguna west',  #1990s
             '41051009302' : 'east pdx',     #1950s
             '41051001102' : 'ladds pdx',    #1910s
             '06037214503' : 'park labrea',  #1940s
             '06075033204' : 'parkmerced'}   #1940s

#tract_ids = {'34007605602' : '1930s',
#             '39035187105' : '1940s',
#             '27003050232' : '1990s'}

cols = [c for c in indicators.columns if '_decade' in c]
tract = indicators[indicators['geoid'].isin(tract_ids.keys())]
tract.set_index('geoid')[cols].T.rename(columns=tract_ids).T.drop(columns=['cumulative_decade', 'majority_decade']).replace(decade_names)

Unnamed: 0_level_0,ztrax_decade,primary_decade,prim_ztrax_decade,earliest_decade,plurality_decade
geoid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
chandler,1990s,1990s,1990s,1980s,1990s
oakland dt,Pre-1940,2000s,Pre-1940,Pre-1940,2000s
berkeley,Pre-1940,Pre-1940,Pre-1940,Pre-1940,Pre-1940
park labrea,1940s,1940s,1940s,1940s,1940s
irvine,1970s,1970s,1970s,1970s,1970s
laguna west,2000s,1990s,1990s,1990s,1990s
parkmerced,1940s,1940s,1940s,1940s,1940s
beacon hill,Pre-1940,Pre-1940,Pre-1940,Pre-1940,Pre-1940
bos wes end,Pre-1940,1960s,Pre-1940,1960s,1960s
vegas sum w,2000s,2000s,2000s,2000s,2000s


## Now make descriptive tables

In [13]:
# which variables go in the tables?
cols = ['primary_decade', 'grid_index', 'orientation_order', 'straightness', 'prop_4way', 
        'prop_deadend', 'k_avg', 'intersect_density', 'length_mean', 'vehicles_per_household',
        'pop_density', 'prop_single_fam', 'med_rooms_per_home', 'mean_household_size', 'med_hh_income',
        'mean_commute_time', 'elevations_iqr', 'grade_mean']

# replace infs with nans to calculate summary stats properly
indicators[cols] = indicators[cols].replace([-np.inf, np.inf], np.nan)

In [18]:
# descriptive stats for the variables
table3 = indicators[cols].describe().round(3).T
table3.to_csv('data/table3.csv', index=True, encoding='utf-8')
table3

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
grid_index,72659.0,0.421,0.209,0.0,0.256,0.396,0.561,1.0
orientation_order,72659.0,0.484,0.316,0.002,0.186,0.466,0.772,1.0
straightness,72663.0,0.936,0.046,0.0,0.912,0.94,0.969,1.0
prop_4way,72663.0,0.21,0.172,0.0,0.092,0.157,0.272,1.0
prop_deadend,72663.0,0.203,0.114,0.0,0.116,0.205,0.289,1.0
k_avg,72663.0,2.528,0.296,0.2,2.332,2.513,2.719,3.686
intersect_density,72660.0,34.666,33.318,0.0,6.088,28.296,52.189,370.754
length_mean,72663.0,240.322,224.423,13.773,130.482,158.987,248.747,20407.2
vehicles_per_household,71270.0,1.785,0.443,0.027,1.538,1.834,2.091,4.333
pop_density,72660.0,2.087,4.594,0.0,0.125,0.873,2.103,100.631


In [19]:
# calculate variables' means per decade
groups = indicators[cols].groupby('primary_decade')
vars_decades = groups.mean().round(3)
vars_decades.insert(0, 'observations', groups.size())
table4 = vars_decades.rename(decade_names).T
table4.to_csv('data/table4.csv', index=True, encoding='utf-8')
table4

primary_decade,Pre-1940,1940s,1950s,1960s,1970s,1980s,1990s,2000s,2010s
observations,21151.0,1550.0,9232.0,5645.0,12368.0,6951.0,7800.0,7160.0,368.0
grid_index,0.539,0.562,0.483,0.41,0.357,0.317,0.286,0.332,0.414
orientation_order,0.617,0.661,0.585,0.493,0.421,0.344,0.299,0.359,0.448
straightness,0.961,0.96,0.948,0.934,0.92,0.911,0.913,0.919,0.936
prop_4way,0.315,0.321,0.241,0.185,0.149,0.134,0.116,0.144,0.213
prop_deadend,0.145,0.125,0.158,0.195,0.245,0.259,0.279,0.25,0.18
k_avg,2.657,2.688,2.622,2.508,2.436,2.411,2.384,2.468,2.585
intersect_density,48.037,52.482,42.806,36.808,25.663,26.698,18.427,21.374,33.922
length_mean,254.531,165.656,170.966,188.238,259.383,232.266,294.315,249.168,204.406
vehicles_per_household,1.578,1.634,1.776,1.766,1.859,1.89,2.018,1.973,1.732


In [20]:
# variables' correlation matrix
table5 = indicators[cols].drop(['primary_decade'], axis='columns').corr().round(3)
table5.to_csv('data/table5.csv', index=True, encoding='utf-8')
table5

Unnamed: 0,grid_index,orientation_order,straightness,prop_4way,prop_deadend,k_avg,intersect_density,length_mean,vehicles_per_household,pop_density,prop_single_fam,med_rooms_per_home,mean_household_size,med_hh_income,mean_commute_time,elevations_iqr,grade_mean
grid_index,1.0,0.899,0.664,0.9,-0.735,0.627,0.571,-0.266,-0.496,0.442,-0.323,-0.363,0.017,-0.241,-0.047,-0.305,-0.377
orientation_order,0.899,1.0,0.577,0.67,-0.597,0.432,0.464,-0.208,-0.396,0.361,-0.26,-0.344,0.06,-0.255,-0.069,-0.299,-0.416
straightness,0.664,0.577,1.0,0.583,-0.51,0.421,0.413,-0.178,-0.336,0.296,-0.17,-0.201,0.028,-0.186,-0.009,-0.336,-0.275
prop_4way,0.9,0.67,0.583,1.0,-0.708,0.632,0.571,-0.247,-0.518,0.502,-0.35,-0.33,-0.017,-0.177,0.036,-0.225,-0.251
prop_deadend,-0.735,-0.597,-0.51,-0.708,1.0,-0.728,-0.621,0.345,0.555,-0.402,0.37,0.33,0.04,0.176,0.045,0.32,0.309
k_avg,0.627,0.432,0.421,0.632,-0.728,1.0,0.405,-0.208,-0.28,0.048,-0.061,-0.128,-0.055,-0.168,-0.195,-0.224,-0.29
intersect_density,0.571,0.464,0.413,0.571,-0.621,0.405,1.0,-0.516,-0.584,0.586,-0.47,-0.326,0.021,-0.033,0.159,-0.26,-0.12
length_mean,-0.266,-0.208,-0.178,-0.247,0.345,-0.208,-0.516,1.0,0.398,-0.238,0.283,0.127,-0.04,-0.091,-0.016,0.4,0.027
vehicles_per_household,-0.496,-0.396,-0.336,-0.518,0.555,-0.28,-0.584,0.398,1.0,-0.538,0.731,0.648,0.344,0.427,0.007,0.218,0.106
pop_density,0.442,0.361,0.296,0.502,-0.402,0.048,0.586,-0.238,-0.538,1.0,-0.496,-0.359,0.068,-0.024,0.314,-0.134,-0.074
