In [101]:
import pandas as pd
from census import Census
from us import states
from shapely.geometry import Point
import geopandas as gp
from scipy.stats import ttest_ind
import rtree
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 250)

In [48]:
df = pd.read_csv('../data/prizip.csv', dtype={'ZCTA5CE10': str})
df.head()

Unnamed: 0,OBJECTID,ZCTA5CE10,Zip,PopOver5,EngOnly,Spanish,Indo_Euro,Asian,Other,PovPop,Under50,Under100,Under130,Under200,Ineligible,Adj_Denom,CalFresh,PRI_Code,Elig_Non_R
0,392,90747,90747,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,No Data,0.0
1,362,91046,91046,125,44,13,39,29,0,125,0,13,27.04,82,29.476,-2.436,13,Below Zero (Data Error),0.0
2,657,92132,92132,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,No Data,0.0
3,403,91210,91210,813,609,102,90,6,6,886,125,197,204.88,240,0.0,0.0,0,No Data,0.0
4,348,90831,90831,0,0,0,0,0,0,0,0,0,0.0,0,0.0,0.0,0,No Data,0.0


In [50]:
bayarea_zipcodes = pd.read_csv('../other_data/bayarea_zipcodes.csv', dtype={'ZIP': str}).drop(columns = ["the_geom", "Area__", "Length__"])
bayarea_zipcodes = bayarea_zipcodes.rename(index=str, columns={"ZIP": "ZCTA5CE10"})
bayarea_zipcodes.shape

(223, 3)

In [51]:
all_zip = (
    gp.read_file(
        "../other_data/cb_2016_us_zcta510_500k.json"
    )
)
all_zip.head()

Unnamed: 0,ZCTA5CE10,AFFGEOID10,GEOID10,ALAND10,AWATER10,geometry
0,21914,8600000US21914,21914,1924479,477175,"POLYGON ((-75.993225 39.575393, -75.970844 39...."
1,1001,8600000US01001,1001,29731610,2118827,"POLYGON ((-72.63940199999999 42.098883, -72.63..."
2,34736,8600000US34736,34736,322808220,78588518,"(POLYGON ((-81.806163 28.568643, -81.796168999..."
3,46151,8600000US46151,46151,530632048,9804480,"(POLYGON ((-86.390692 39.339599, -86.381395 39..."
4,48039,8600000US48039,48039,59592687,4845242,"POLYGON ((-82.61060000000001 42.724669, -82.48..."


In [95]:
df_zip = df.merge(bayarea_zipcodes, on='ZCTA5CE10')
df_zip.loc[df_zip['ZCTA5CE10'] == '94035']


Unnamed: 0,OBJECTID,ZCTA5CE10,Zip,PopOver5,EngOnly,Spanish,Indo_Euro,Asian,Other,PovPop,Under50,Under100,Under130,Under200,Ineligible,Adj_Denom,CalFresh,PRI_Code,Elig_Non_R,PO_NAME,STATE


In [96]:
merged = all_zip.merge(df_zip, how='right',on='ZCTA5CE10')
merged.shape

(222, 26)

In [102]:
merged['percent_calfresh'] = merged['CalFresh']/merged['PopOver5']
merged.sort_values(by=['percent_calfresh'], ascending=False)

Unnamed: 0,ZCTA5CE10,AFFGEOID10,GEOID10,ALAND10,AWATER10,geometry,OBJECTID,Zip,PopOver5,EngOnly,Spanish,Indo_Euro,Asian,Other,PovPop,Under50,Under100,Under130,Under200,Ineligible,Adj_Denom,CalFresh,PRI_Code,Elig_Non_R,PO_NAME,STATE,percent_calfresh
41,95113,8600000US95113,95113,880603,0,"POLYGON ((-121.893293 37.339833, -121.884891 3...",827,95113,1271,808,50,76,267,70,1332,161,377,457.6,494,279.57898,178.02102,2131,Over 100 (Data Error),0.0,San Jose,CA,1.676633
5,95013,8600000US95013,95013,3901007,0,"POLYGON ((-121.751149 37.226434, -121.747478 3...",756,95013,66,43,23,0,0,0,66,11,11,11.44,11,11.44992,-0.00992,45,Below Zero (Data Error),0.0,Coyote,CA,0.681818
21,94621,8600000US94621,94621,20145852,4590939,"POLYGON ((-122.248619837374 37.7275859991074, ...",261,94621,29903,11441,16884,216,841,521,33210,4934,11498,14697.28,22312,3891.73262,10805.54738,8577,50.0 - 79.9,2228.54738,OAKLAND,CA,0.286827
23,94612,8600000US94612,94612,2215894,0,"POLYGON ((-122.274296 37.817431, -122.273752 3...",257,94612,14818,9111,1323,540,3270,574,15007,1539,4283,5516.16,7671,1310.802,4205.358,4086,80.0 - 100.0,119.358,OAKLAND,CA,0.275746
110,94590,8600000US94590,94590,15388413,2497524,"POLYGON ((-122.268872 38.126181, -122.255875 3...",198,94590,34371,23063,7336,538,3322,112,36337,4215,9524,11704.16,17490,2247.46914,9456.69086,8385,80.0 - 100.0,1071.69086,VALLEJO,CA,0.243956
205,94603,8600000US94603,94603,8447500,0,"POLYGON ((-122.165823 37.753588, -122.161409 3...",249,94603,31005,13527,14823,119,1778,758,33663,2599,7796,10683.92,16909,3589.69684,7094.22316,7131,Over 100 (Data Error),0.0,OAKLAND,CA,0.229995
152,94124,8600000US94124,94124,12759665,2806581,"POLYGON ((-122.403784 37.749433, -122.37936078...",5,94124,32904,15014,6159,621,10844,266,35245,2755,7194,10312.64,14664,3753.39574,6559.24426,7520,Over 100 (Data Error),0.0,SAN FRANCISCO,CA,0.228544
91,94605,8600000US94605,94605,23195143,139200,"POLYGON ((-122.197348 37.771623, -122.194594 3...",250,94605,40242,29258,8082,743,1795,364,42856,3874,8608,10708.88,16097,2861.78214,7847.09786,8885,Over 100 (Data Error),0.0,OAKLAND,CA,0.220789
176,94801,8600000US94801,94801,29210712,16462894,"POLYGON ((-122.364399563109 38.0125314878771, ...",272,94801,27533,9685,14809,443,2178,418,29686,2791,7343,10523.76,15390,3035.5175,7488.2425,5969,50.0 - 79.9,1519.2425,RICHMOND,CA,0.216794
114,94601,8600000US94601,94601,8412005,310715,"POLYGON ((-122.223531 37.795772, -122.20984 37...",206,94601,46785,16463,21120,543,7587,1072,49806,5742,13674,18134.48,26692,5371.13354,12763.34646,9903,50.0 - 79.9,2860.34646,OAKLAND,CA,0.21167


In [97]:
with open('../data/zipsnap.json', 'w') as f:
    f.write(merged.to_json())