In [1]:
import pandas as pd
import geopandas as gp
import folium
import numpy as np

In [20]:
year = 2018

In [21]:
acs = pd.read_csv('C:/Workspace/TNC-Demand-Model/Outputs/ACS_' + str(year) + '.csv')

In [22]:
tracts = gp.read_file('C:/Workspace/TNC-Demand-Model/Inputs/Census Shapefiles/Chicago Tracts/geo_export_558aad9f-98d8-4dd5-a6b1-c1730155d596.shp')

In [23]:
tracts.geoid10

0      17031842400
1      17031840300
2      17031841100
3      17031841200
4      17031839000
          ...     
796    17031070400
797    17031070500
798    17031130300
799    17031292200
800    17031630900
Name: geoid10, Length: 801, dtype: object

In [24]:
acs['CENSUS_TRACT'] = acs.CENSUS_TRACT.astype(str)

In [25]:
geo_area = gp.read_file('C:/Workspace/TNC-Demand-Model/Inputs/Census Shapefiles/Tracts/tl_2019_17_tract/tl_2019_17_tract.shp')

geo_area['SQUARE_MILES_LAND'] = geo_area['ALAND']*3.86102e-7
geo_area['GEOID'] = geo_area.GEOID.astype(str)

acs = acs.merge(geo_area[['GEOID','SQUARE_MILES_LAND']], how = 'left', left_on = 'CENSUS_TRACT', right_on = 'GEOID')

In [26]:
acs['TOTAL_POP_PER_SQ_MILE'] = acs['TOTAL_POP'] / acs['SQUARE_MILES_LAND']
acs['TOTAL_HHLDS_PER_SQ_MILE'] = acs['TOTAL_HHLDS'] / acs['SQUARE_MILES_LAND']

In [27]:
normalize_vars = ['AGE_25_34', 'TOTAL_POP_25P_BACHELORS_HIGHER','TOTAL_HHLDS_200KP' ]

for var in normalize_vars:
    acs[var] = acs[var].replace('-',np.nan)
    acs[var] = acs[var].replace('250,000+','250000')
    acs[var] = acs[var].astype(float)
    acs[var + '_PER_POP'] = acs[var]/acs['TOTAL_POP']

In [28]:
centroids = pd.read_csv('C:/Workspace/TNC-Demand-Model/otp/points.csv')

centroids = gp.GeoDataFrame(centroids)

In [29]:
acs['HHLDS_0_VEH_PER_HHLD'] = acs['HHLDS_0_VEH']/acs['TOTAL_HHLDS']

In [30]:
acs['TOTAL_HHLDS_100K_150K'] = acs['TOTAL_HHLDS_100K_150K'].replace('-',np.nan)
acs['TOTAL_HHLDS_150K_200K'] = acs['TOTAL_HHLDS_150K_200K'].replace('-',np.nan)
acs['TOTAL_HHLDS_200KP'] = acs['TOTAL_HHLDS_200KP'].replace('-',np.nan)

acs['TOTAL_HHLDS_100K_150K'] = acs['TOTAL_HHLDS_100K_150K'].astype(float)
acs['TOTAL_HHLDS_150K_200K'] = acs['TOTAL_HHLDS_150K_200K'].astype(float)
acs['TOTAL_HHLDS_200KP'] = acs['TOTAL_HHLDS_200KP'].astype(float)


acs['HHLDS_100KP_PER_HHLD'] = (acs['TOTAL_HHLDS_100K_150K'] +acs['TOTAL_HHLDS_150K_200K'] + acs['TOTAL_HHLDS_200KP'])/acs['TOTAL_HHLDS'] 

In [31]:
variables = acs.columns.drop(['Unnamed: 0', 'CENSUS_TRACT', 'GEOID'])

In [32]:
acs['TOTAL_HHLDS_100K_150K'] = acs['TOTAL_HHLDS_100K_150K'].astype(float)
acs['TOTAL_HHLDS_150K_200K'] = acs['TOTAL_HHLDS_150K_200K'].astype(float)
acs['TOTAL_HHLDS_200KP'] = acs['TOTAL_HHLDS_200KP'].astype(float)


In [33]:
acs[acs['SQUARE_MILES_LAND'] > 10].GEOID.astype(str)

780     17031804201
1081    17031823801
Name: GEOID, dtype: object

In [34]:
for var in variables:
    acs[var] = acs[var].replace('-',np.nan)
    acs[var] = acs[var].replace('250,000+','250000')

    acs[var] = acs[var].astype(float)
    print('Working on variable ' + var)
    m = folium.Map([41.8781, -87.6298], zoom_start=11, tiles = 'stamen toner')
    print(acs[var].max())
    print(acs[var].min())
    print(acs[var].mean())

    bins = np.nanquantile(acs[var], [0, 0.5, 0.75,0.98,1])
    print(bins)
    
    # Add the color for the chloropleth:
    m.choropleth(
     geo_data=tracts,
     name= var,
     data=acs,
     columns = ['CENSUS_TRACT', var],
     key_on='feature.properties.geoid10',
     fill_color='BuGn',
     fill_opacity=0.6,
     line_opacity=0.2,
     bins = bins,
     legend_name= var + ' Totals',
     highlight = True
    )

    feature_group = folium.FeatureGroup(name='Census Tract Centroids', show = False)

    for tract2 in centroids.GEOID:
        row = centroids[centroids['GEOID'] == tract2]
        folium.CircleMarker([row['Y'], row['X']], popup = str(int(row['GEOID'].values[0])), radius = 1, fill = True, fill_color = 'grey', color = 'grey').add_to(feature_group)
    
    m.add_child(feature_group)
    
    folium.LayerControl().add_to(m)

    m.save('C:/Workspace/TNC-Demand-Model/Data Exploration/ACS Maps/' + str(year) + '/' + var + '_Map.html')


Working on variable TOTAL_POP
19889.0
0.0
3960.363153904473
[    0.  3836.  5188.  7832. 19889.]




Working on variable TOTAL_MALE
9249.0
0.0
1921.6376042456407
[   0.   1863.   2502.5  3900.88 9249.  ]




Working on variable TOTAL_FEMALE
10649.0
0.0
2038.7255496588325
[    0.    1986.    2666.    3988.48 10649.  ]




Working on variable AGE_5U
1041.0
0.0
248.55496588324488
[   0.    224.    330.    633.64 1041.  ]




Working on variable AGE_5_9
1074.0
0.0
240.48749052312357
[   0.    221.    331.    620.56 1074.  ]




Working on variable AGE+10_14
936.0
0.0
244.34723275208492
[  0.  220.  342.5 635.  936. ]




Working on variable AGE_15_19
2644.0
0.0
241.20697498104624
[   0.    208.    334.5   682.84 2644.  ]




Working on variable AGE_20_24
2632.0
0.0
266.2600454890068
[   0.    225.    355.5   750.76 2632.  ]




Working on variable AGE_25_34
6351.0
0.0
646.9438968915846
[   0.    553.    821.5  1864.04 6351.  ]




Working on variable AGE_35_44
3786.0
0.0
534.2494313874147
[   0.    497.    692.5  1204.12 3786.  ]




Working on variable AGE_45_54
2788.0
0.0
507.0940106141016
[   0.    478.    683.   1148.84 2788.  ]




Working on variable AGE_55_59
1243.0
0.0
253.10917361637604
[   0.    230.    343.    634.92 1243.  ]




Working on variable AGE_60_64
965.0
0.0
228.65579984836998
[  0.   204.   312.5  610.64 965.  ]




Working on variable AGE_65_74
1239.0
0.0
310.8127369219105
[   0.    281.    416.5   827.92 1239.  ]




Working on variable AGE_75_84
960.0
0.0
165.10007581501137
[  0.   136.   230.   559.64 960.  ]




Working on variable AGE_85P
846.0
0.0
73.54131918119788
[  0.    50.    99.   306.28 846.  ]




Working on variable MEDIAN_AGE
66.8
15.9
37.225323193916296
[15.9   36.5   41.6   51.316 66.8  ]




Working on variable TOTAL_HHLDS
11517.0
0.0
1488.3017437452615
[    0.    1415.    1874.    3181.84 11517.  ]




Working on variable AVG_HHLD_SIZE
4.5
1.34
2.694342205323195
[1.34   2.69   3.02   3.8672 4.5   ]




Working on variable AVG_FAMILY_SIZE
5.18
2.16
3.4221901140684405
[2.16  3.38  3.735 4.42  5.18 ]




Working on variable TOTAL_POP_18_24
4465.0
0.0
361.08036391205457
[   0.   301.   466.   974.2 4465. ]




Working on variable TOTAL_POP_18_24_NO_HIGH_SCHOOL
1958.0
0.0
46.076573161485975
[   0.     32.     65.5   178.64 1958.  ]




Working on variable TOTAL_POP_18_24_HIGH_SCHOOL
1249.0
0.0
97.5822592873389
[   0.     74.    134.    323.28 1249.  ]




Working on variable TOTAL_POP_18_24_SOME_COLLEGE
2971.0
0.0
156.3335860500379
[   0.    121.    196.    498.64 2971.  ]




Working on variable TOTAL_POP_18_24_SOME_BACHELORS_HIGHER
766.0
0.0
61.08794541319181
[  0.   35.   73.  339.2 766. ]




Working on variable TOTAL_POP_25P
16430.0
0.0
2719.5064442759667
[    0.    2631.    3531.    5546.48 16430.  ]




Working on variable TOTAL_POP_25P_LESS_THAN_9TH
1903.0
0.0
183.39802880970433
[   0.    100.    233.5   837.04 1903.  ]




Working on variable TOTAL_POP_25P_9TH_TO_12TH
2469.0
0.0
178.5625473843821
[   0.    154.    254.    509.92 2469.  ]




Working on variable TOTAL_POP_25P_HIGH_SCHOOL
2412.0
0.0
635.0030326004548
[   0.    576.    918.5  1593.04 2412.  ]




Working on variable TOTAL_POP_25P_SOME_COLLEGE
2204.0
0.0
512.131159969674
[   0.    467.    702.   1256.28 2204.  ]




Working on variable TOTAL_POP_25P_ASSOCIATES
922.0
0.0
177.83093252463988
[  0.   152.   251.   520.28 922.  ]




Working on variable TOTAL_POP_25P_BACHELORS
5397.0
0.0
615.0
[   0.   486.   885.5 1864.6 5397. ]




Working on variable TOTAL_POP_25P_GRADUATE
6964.0
0.0
417.5807429871114
[   0.    269.    572.   1532.56 6964.  ]




Working on variable TOTAL_POP_25P_HIGH_SCHOOL_HIGHER
16015.0
0.0
2357.54586808188
[    0.    2261.    3065.5   5132.56 16015.  ]




Working on variable TOTAL_POP_25P_BACHELORS_HIGHER
12361.0
0.0
1032.5807429871115
[    0.     771.    1494.    3288.72 12361.  ]




Working on variable TOTAL_HHLDS_LESS_10K
50.4
0.0
8.567604562737639
[ 0.     6.1   11.6   29.444 50.4  ]




Working on variable TOTAL_HHLDS_10K_15K
26.7
0.0
4.79749049429657
[ 0.   3.7  6.6 16.6 26.7]




Working on variable TOTAL_HHLDS_15K_25K
41.0
0.0
10.384258555133094
[ 0.   9.2 13.6 27.3 41. ]




Working on variable TOTAL_HHLDS_25K_35K
29.8
0.0
9.085703422053225
[ 0.     8.7   12.2   19.672 29.8  ]




Working on variable TOTAL_HHLDS_35K_50K
34.8
0.0
11.862813688212919
[ 0.    11.4   15.3   23.944 34.8  ]




Working on variable TOTAL_HHLDS_50K_75K
39.8
0.0
15.977566539923957
[ 0.    15.8   19.6   27.844 39.8  ]




Working on variable TOTAL_HHLDS_75K_100K
28.8
0.0
11.454904942965785
[ 0.  11.4 14.4 21.7 28.8]




Working on variable TOTAL_HHLDS_100K_150K
39.3
0.0
13.726311787072262
[ 0.    13.6   18.7   26.972 39.3  ]




Working on variable TOTAL_HHLDS_150K_200K
20.8
0.0
6.335665399239539
[ 0.     5.4    9.5   18.072 20.8  ]




Working on variable TOTAL_HHLDS_200KP
62.7
0.0
7.80950570342206
[ 0.     3.8   10.4   37.888 62.7  ]




Working on variable HHLDS_MEDIAN_INCOME
250000.0
9787.0
64898.03269961977
[  9787.    57557.    82788.   156918.92 250000.  ]




Working on variable HHLDS_MEAN_INCOME
250000.0
9787.0
64898.03269961977
[  9787.    57557.    82788.   156918.92 250000.  ]




Working on variable TOTAL_HHLDS_DP04
11517.0
0.0
1488.3017437452615
[    0.    1415.    1874.    3181.84 11517.  ]




Working on variable HHLDS_0_VEH
3473.0
0.0
263.434420015163
[   0.   174.   306.5 1252.2 3473. ]




Working on variable HHLDS_1_VEH
7860.0
0.0
602.5276724791509
[   0.  527.  753. 1497. 7860.]




Working on variable HHLDS_2_VEH
2343.0
0.0
448.0257771038666
[   0.    395.    635.5  1176.64 2343.  ]




Working on variable HHLDS_3P_VEH
1093.0
0.0
174.31387414708112
[   0.   129.   271.   623.4 1093. ]




Working on variable AVG_VEH_PER_HHLD
2.207279457125231
0.0
1.3232548838238232
[0.         1.34846827 1.66458299 2.01843245 2.20727946]




Working on variable SQUARE_MILES_LAND
29.627703379962
0.0
0.716398644098684
[ 0.          0.33581646  0.80438734  4.2517939  29.62770338]




Working on variable TOTAL_POP_PER_SQ_MILE
263230.23826606694
0.0
13737.655051312018
[     0.          10041.45938197  18909.99744089  49833.15979039
 263230.23826607]




Working on variable TOTAL_HHLDS_PER_SQ_MILE
167031.97623543945
0.0
5618.212362906866
[     0.           3557.09315548   6860.8401701   27663.91081785
 167031.97623544]




Working on variable AGE_25_34_PER_POP
0.5623091867887872
0.02389973768580589
0.16740279543053196
[0.02389974 0.14510365 0.18980073 0.42395202 0.56230919]




Working on variable TOTAL_POP_25P_BACHELORS_HIGHER_PER_POP
0.8245695672405771
0.0026542800265428003
0.25692631386517
[0.00265428 0.20958084 0.38080624 0.66920255 0.82456957]




Working on variable TOTAL_HHLDS_200KP_PER_POP
0.049079089924160345
0.0
0.002411738751215417
[0.         0.00095873 0.00286356 0.01446401 0.04907909]




Working on variable HHLDS_0_VEH_PER_HHLD
0.7781818181818182
0.0
0.19016950444634645
[0.         0.14546685 0.28091472 0.55177689 0.77818182]




Working on variable HHLDS_100KP_PER_HHLD
0.43412698412698414
0.0
0.022358373575894687
[0.         0.01660926 0.02767054 0.08305556 0.43412698]


