In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import api_key
c = Census(api_key, year=2015)

In [2]:
eco_data_2015 = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E", "B23025_003E","B23025_005E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
eco_2015 = pd.DataFrame(eco_data_2015)

# Column Reordering
eco_2015 = eco_2015.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_003E": "Employable Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
eco_2015["Poverty Rate"] = 100 * \
    eco_2015["Poverty Count"].astype(
        int) / eco_2015["Population"].astype(int)
# Add in Civilian Unemployment rate
eco_2015["Unemployment Rate"] = 100 * \
    eco_2015["Unemployment Count"].astype(
        int) / eco_2015["Employable Count"].astype(int)

# Final DataFrame
eco_2015 = eco_2015[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate","Employable Count","Unemployment Count", "Unemployment Rate"]]

# Visualize
print(len(eco_2015))
eco_2015.head()


33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Employable Count,Unemployment Count,Unemployment Rate
0,601,17982.0,37.6,10816.0,7453.0,10861.0,60.399288,6162.0,2213.0,35.913664
1,602,40260.0,39.5,16079.0,8474.0,21720.0,53.949329,15262.0,3807.0,24.944306
2,603,52408.0,39.9,16804.0,10179.0,25459.0,48.578461,17015.0,3991.0,23.455774
3,606,6331.0,40.8,12512.0,5863.0,4070.0,64.286843,1679.0,179.0,10.661108
4,610,28328.0,40.2,17475.0,8452.0,14005.0,49.438718,9542.0,1495.0,15.667575


In [3]:
eco_2015.to_csv("Census_2015/census_economic_2015.csv", encoding="utf-8", index=False)

In [4]:
population_data= c.acs5.get(("NAME", "B19013_001E",
                               "B02001-002E",
                               "B02001_003E",
                               "B03001_003E",
                               "B02001-005E",
                               "B02001_004E",
                               "B02001_008E"), {'for': 'zip code tabulation area:*'})

population_pd= pd.DataFrame(population_data)

population_pd= population_pd.rename(columns={"B19013_001E": "Population",
                                             "B02001-002E": "White(%)",
                                             "B02001_003E": "African American(%)", "B03001_003E": "Hispanic(%)", 
                                             "B02001-005E" : "Asian(%)", "B02001_004E": "American Indian(%)",
                                             "B02001_008E": "Two Or More Races(%)",  "NAME": "Name",
                                             "zip code tabulation area": "Zipcode" 
                                            })
#population_pd= population_pd[["Zipcode","Population","White(%)","African American(%)","Hispanic(%)","Asian(%)","American Indian(%)"]]

print(len(population_pd))
population_pd.head()


0


In [5]:
#population_pd.to_csv("census_population_2015.csv",encoding="utf-8",index=False)

In [6]:
edu_data_2015= c.acs5.get(("NAME","B01003_001E", "B15003_002E",
                               "B15003_017E",
                               "B15003_018E",
                               "B15003_021E",
                               "B15003_022E",
                               "B15003_023E",
                               "B15003_024E",
                           "B15003_025E"), {'for': 'zip code tabulation area:*'})

edu_2015= pd.DataFrame(edu_data_2015)

edu_2015= edu_2015.rename(columns={"B01003_001E": "Population","B15003_002E": "None",
                               "B15003_017E": "High School Degree",
                               "B15003_018E": "GED",
                               "B15003_021E": "Associate Degree",
                               "B15003_022E": "Bachelor Degree",
                               "B15003_023E":"Masters Degree",
                               "B15003_024E": "Professional Degree",
                           "B15003_025E":"Doctorate Degree", "NAME": "name","zip code tabulation area": "Zipcode"})

edu_2015= edu_2015[["Zipcode","None","High School Degree","GED","Associate Degree","Bachelor Degree","Masters Degree","Professional Degree","Doctorate Degree"]]

edu_2015.head()

Unnamed: 0,Zipcode,None,High School Degree,GED,Associate Degree,Bachelor Degree,Masters Degree,Professional Degree,Doctorate Degree
0,601,456.0,3017.0,156.0,615.0,1720.0,248.0,59.0,65.0
1,602,955.0,4770.0,1004.0,3499.0,4203.0,1242.0,206.0,258.0
2,603,1014.0,8309.0,1381.0,3147.0,5981.0,1710.0,283.0,280.0
3,606,188.0,1441.0,74.0,155.0,385.0,21.0,17.0,32.0
4,610,377.0,5707.0,803.0,1598.0,2266.0,453.0,38.0,144.0


In [7]:
edu_2015.to_csv("Census_2015/census_education_2015.csv",encoding="utf-8",index=False)

In [8]:
from config import api_key
c = Census(api_key, year=2016)

In [9]:
eco_data_2016 = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E", "B23025_003E","B23025_005E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
eco_2016 = pd.DataFrame(eco_data_2016)

# Column Reordering
eco_2016 = eco_2016.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_003E": "Employable Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
eco_2016["Poverty Rate"] = 100 * \
    eco_2016["Poverty Count"].astype(
        int) / eco_2016["Population"].astype(int)
# Add in Civilian Unemployment rate
eco_2016["Unemployment Rate"] = 100 * \
    eco_2016["Unemployment Count"].astype(
        int) / eco_2016["Employable Count"].astype(int)

# Final DataFrame
eco_2016 = eco_2016[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate","Employable Count","Unemployment Count", "Unemployment Rate"]]

# Visualize
eco_2016.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Employable Count,Unemployment Count,Unemployment Rate
0,1001,17423.0,45.0,56714.0,30430.0,1462.0,8.391207,9194.0,479.0,5.20992
1,1002,29970.0,23.2,48923.0,26072.0,8351.0,27.864531,16451.0,1271.0,7.725974
2,1003,11296.0,19.9,2499.0,3829.0,54.0,0.478045,5279.0,1021.0,19.340784
3,1005,5228.0,44.1,70568.0,32169.0,230.0,4.399388,2988.0,153.0,5.120482
4,1007,14888.0,42.5,80502.0,36359.0,1410.0,9.470715,8593.0,543.0,6.319097


In [10]:
eco_2016.to_csv("Census_2016/census_economic_2016.csv", encoding="utf-8", index=False)

In [11]:
edu_data_2016= c.acs5.get(("NAME","B01003_001E", "B15003_002E",
                               "B15003_017E",
                               "B15003_018E",
                               "B15003_021E",
                               "B15003_022E",
                               "B15003_023E",
                               "B15003_024E",
                           "B15003_025E"), {'for': 'zip code tabulation area:*'})

edu_2016= pd.DataFrame(edu_data_2016)

edu_2016= edu_2016.rename(columns={"B01003_001E": "Population","B15003_002E": "None",
                               "B15003_017E": "High School Degree",
                               "B15003_018E": "GED",
                               "B15003_021E": "Associate Degree",
                               "B15003_022E": "Bachelor Degree",
                               "B15003_023E":"Masters Degree",
                               "B15003_024E": "Professional Degree",
                           "B15003_025E":"Doctorate Degree", "NAME": "name","zip code tabulation area": "Zipcode"})

edu_2016= edu_2016[["Zipcode","None","High School Degree","GED","Associate Degree","Bachelor Degree","Masters Degree","Professional Degree","Doctorate Degree"]]

edu_2016.head()

Unnamed: 0,Zipcode,None,High School Degree,GED,Associate Degree,Bachelor Degree,Masters Degree,Professional Degree,Doctorate Degree
0,1001,69.0,3624.0,283.0,1342.0,2016.0,1353.0,121.0,116.0
1,1002,212.0,1540.0,138.0,694.0,3161.0,2956.0,592.0,2040.0
2,1003,0.0,0.0,7.0,0.0,3.0,10.0,1.0,1.0
3,1005,0.0,1292.0,120.0,321.0,505.0,327.0,92.0,76.0
4,1007,18.0,1946.0,305.0,978.0,2185.0,1455.0,228.0,387.0


In [12]:
edu_2016.to_csv("Census_2016/census_education_2016.csv", encoding="utf-8", index=False)