In [119]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
from us import states

# Census API Key
from config import census_key
c = Census(census_key, year=2016)

In [120]:
#B05007 - PLACE OF BIRTH BY YEAR OF ENTRY BY CITIZENSHIP STATUS FOR THE FOREIGN-BORN POPULATION - 2016 ACS 5-year estimates

# Run Census Search to retrieve data on all zip codes (2016 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels

census_data = c.acs5.get(("NAME", 
                          # Summary wealth stats (May drop later)
                          "B01003_001E", 'B01002_001E', 'B19013_001E', 'B19301_001E', 'B17001_002E',
                          # Total Foreign Born
                          "B05007_001E",
                          #Entered 2010 or Later
                          "B05007_002E", "B05007_003E","B05007_004E",
                          #Entered 2000 to 2009
                           "B05007_005E", "B05007_006E", "B05007_007E",
                          #Entered 1990 to 1999
                          "B05007_008E", "B05007_009E", "B05007_010E",
                          #Earlier than 1990
                         "B05007_011E", "B05007_012E","B05007_013E"), 
                         geo={'for': 'county:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)
census_pd.head(1)

Unnamed: 0,B01002_001E,B01003_001E,B05007_001E,B05007_002E,B05007_003E,B05007_004E,B05007_005E,B05007_006E,B05007_007E,B05007_008E,...,B05007_010E,B05007_011E,B05007_012E,B05007_013E,B17001_002E,B19013_001E,B19301_001E,NAME,county,state
0,37.8,55049.0,1012.0,146.0,3.0,143.0,297.0,61.0,236.0,204.0,...,95.0,365.0,302.0,63.0,6697.0,53099.0,26168.0,"Autauga County, Alabama",1,1


In [121]:
# Column renaming

# Natu = Naturalized (Became a citizen)
# NCit = Not a Citizen
# Total = Natu + NCit
# Total_ForeignBorn = total from any year

census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B05007_001E": "Total_ForeignBorn",
                                      "B05007_002E": "Total_2010",
                                      "B05007_003E": "Natu_2010",
                                      "B05007_004E": "NCit_2010",
                                      "B05007_005E": "Total_2000",
                                      "B05007_006E": "Natu_2000",
                                      "B05007_007E": "NCit_2000",
                                      "B05007_008E": "Total_1990",
                                      "B05007_009E": "Natu_1990",
                                      "B05007_010E": "NCit_1990",
                                      "B05007_011E": "Total_Early90",
                                      "B05007_012E": "Natu_Early90",
                                      "B05007_013E": "NCit_Early90",
                                      "NAME": "combined_name",
                                      "county": "county_fips",
                                      "state": "state_fips"
                                     })
# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Combined FIPS
census_pd['combined_fips'] = census_pd['state_fips'].astype(str) + census_pd['county_fips']

# Separate State and County into Columns
census_pd['state_name'], census_pd['county_name'] = census_pd['combined_name'].str.split(',', 1).str

#Reorder
cols = census_pd.columns.tolist()
cols.insert(0, cols.pop(cols.index('combined_fips')))
cols.insert(1, cols.pop(cols.index('county_name')))
cols.insert(2, cols.pop(cols.index('state_name')))
census_pd = census_pd.reindex(columns= cols)

# Visualize
print(len(census_pd))
census_pd.head(3)

3220


Unnamed: 0,combined_fips,county_name,state_name,Median Age,Population,Total_ForeignBorn,Total_2010,Natu_2010,NCit_2010,Total_2000,...,Total_Early90,Natu_Early90,NCit_Early90,Poverty Count,Household Income,Per Capita Income,combined_name,county_fips,state_fips,Poverty Rate
0,1001,Alabama,Autauga County,37.8,55049.0,1012.0,146.0,3.0,143.0,297.0,...,365.0,302.0,63.0,6697.0,53099.0,26168.0,"Autauga County, Alabama",1,1,12.165525
1,1003,Alabama,Baldwin County,42.3,199510.0,6523.0,1196.0,39.0,1157.0,2097.0,...,1905.0,1476.0,429.0,25551.0,51365.0,28069.0,"Baldwin County, Alabama",3,1,12.806877
2,1005,Alabama,Barbour County,38.7,26614.0,761.0,68.0,0.0,68.0,479.0,...,73.0,56.0,17.0,6235.0,33956.0,17249.0,"Barbour County, Alabama",5,1,23.427519


In [122]:
census_pd.to_csv('census_immig.csv')