In [20]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

# Census API Key
from config import api_key
c = Census(api_key, year=2017)

In [None]:
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

In [116]:
# Create series of Chicago zipcodes
chi_zip = []

chi_seq = 60601

for i in range(56):
    chi_zip.append(str(chi_seq))
    
    # skip invalid zipcodes
    if chi_seq in ([60626, 60634, 60647, 60649, 60657]):
        chi_seq += 1
        
    chi_seq += 1
    i += 1

# Create Chicago zipcode DataFrame
df_chi_zip = pd.DataFrame(data=chi_zip)
df_chi_zip.rename(columns = {0:"Zipcode"}, inplace=True)

df_chi_zip.head()

Unnamed: 0,Zipcode
0,60601
1,60602
2,60603
3,60604
4,60605


In [78]:
# Run Census Search to retrieve general data on all zip codes (2017 ACS5 Census)

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", 
                          "B19301_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})
    
# Convert to DataFrame
df_census_general = pd.DataFrame(census_data)

# Column Reordering
df_census_general = df_census_general.rename(columns={"B01003_001E": "Population",
                                                      "B01002_001E": "Median Age",
                                                      "B19013_001E": "Median Household Income",
                                                      "B19301_001E": "Per Capita Income",
                                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
df_census_general["Poverty Rate"] = 100 * \
    df_census_general["Poverty Count"].astype(
        int) / df_census_general["Population"].astype(int)

# Final DataFrame
df_census_general = df_census_general[["Zipcode", "Population", "Median Age", "Median Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

# Visualize
print(len(df_census_general))
df_census_general.head()


33120


Unnamed: 0,Zipcode,Population,Median Age,Median Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17599.0,38.9,11757.0,7041.0,11282.0,64.105915
1,602,39209.0,40.9,16190.0,8978.0,20428.0,52.100283
2,603,50135.0,40.4,16645.0,10897.0,25176.0,50.216416
3,606,6304.0,42.8,13387.0,5960.0,4092.0,64.911168
4,610,27590.0,41.4,18741.0,9266.0,12553.0,45.498369


In [118]:
# merge (right outer join) census data with chicago zip code dataframe to get relevant general data set
df_chi_general = pd.merge(df_census_general, df_chi_zip, on='Zipcode', how = 'right')

print(len(df_chi_general))
df_chi_general.head()

56


Unnamed: 0,Zipcode,Population,Median Age,Median Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,60601,13887.0,37.6,110215.0,95152.0,1171.0,8.432347
1,60602,1277.0,31.1,115774.0,77279.0,32.0,2.505873
2,60603,1197.0,26.7,140114.0,106207.0,261.0,21.804511
3,60604,668.0,40.6,114565.0,130966.0,127.0,19.011976
4,60605,26188.0,33.6,107811.0,81144.0,2312.0,8.828471


In [101]:
# Run Census Search to retrieve race data on all zip codes (2017 ACS5 Census)

census_data = c.acs5.get(("NAME", "B02001_001E", "B02001_002E", "B02001_003E", "B02001_004E",
                          "B02001_005E", "B02001_006E", "B02001_007E"), {'for': 'zip code tabulation area:*'})
    
# Convert to DataFrame
df_census_race = pd.DataFrame(census_data)

# Column Reordering
df_census_race = df_census_race.rename(columns={"B02001_001E": "Total",
                                                "B02001_002E": "White", 
                                                "B02001_003E": "Black",
                                                "B02001_004E": "Native American",
                                                "B02001_005E": "Asian",
                                                "B02001_006E": "Pacific Islander",
                                                "B02001_007E": "Other",
                                                  "NAME": "Name", "zip code tabulation area": "Zipcode"})
# Final DataFrame
df_census_race = df_census_race[["Zipcode", "Total", "White", "Black", "Native American", "Asian", 
                                    "Pacific Islander", "Other"]]

# Visualize
print(len(df_census_race))
df_census_race.head()


33120


Unnamed: 0,Zipcode,Total,White,Black,Native American,Asian,Pacific Islander,Other
0,84306,345.0,345.0,0.0,0.0,0.0,0.0,0.0
1,84775,421.0,419.0,0.0,0.0,0.0,0.0,0.0
2,84762,211.0,211.0,0.0,0.0,0.0,0.0,0.0
3,84772,203.0,203.0,0.0,0.0,0.0,0.0,0.0
4,84781,224.0,224.0,0.0,0.0,0.0,0.0,0.0


In [119]:
# merge (right outer join) census data with chicago zip code dataframe to get relevant race data set
df_chi_race = pd.merge(df_census_race, df_chi_zip, on='Zipcode', how = 'right')

print(len(df_chi_race))
df_chi_race.head()

56


Unnamed: 0,Zipcode,Total,White,Black,Native American,Asian,Pacific Islander,Other
0,60607,28928.0,17396.0,3681.0,30.0,6399.0,26.0,377.0
1,60609,60994.0,29002.0,14628.0,402.0,3755.0,12.0,11885.0
2,60625,80676.0,53120.0,3981.0,112.0,11272.0,41.0,8098.0
3,60646,27865.0,23611.0,246.0,6.0,2713.0,0.0,745.0
4,60652,43582.0,11459.0,20510.0,133.0,325.0,12.0,9717.0


In [None]:
# Run Census Search to retrieve marital status data on all zip codes (2017 ACS5 Census)

census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E", 
                          "B19301_001E", "B17001_002E"), {'for': 'zip code tabulation area:*'})
    
# Convert to DataFrame
df_census_marital_Status = pd.DataFrame(census_data)

# Column Reordering
df_census_marital_status = df_census_general.rename(columns={"B01003_001E": "Population",
                                                      "B01002_001E": "Median Age",
                                                      "B19013_001E": "Median Household Income",
                                                      "B19301_001E": "Per Capita Income",
                                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Final DataFrame
df_census_marital_status = df_census_marital_status[["Zipcode", "Population", "Median Age", "Median Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

# Visualize
print(len(df_census_marital_status))
df_census_marital_status.head()


In [None]:
# merge (right outer join) census data with chicago zip code dataframe to get relevant marital status data set
df_chi_race = pd.merge(df_census_race, df_chi_zip, on='Zipcode', how = 'right')

print(len(df_chi_marital_status))
df_chi_marital_status.head()

In [3]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
df_chi_census.to_csv("census_data.csv", encoding="utf-8", index=False)