In [1]:
!pip install us



In [2]:
#Dependencies
from census import Census
from config_census import (census_key, gkey)
import gmaps
import numpy as np
import pandas as pd
import requests
import time
from us import states
from scipy.stats import linregress
from matplotlib import pyplot as plt

# Census API Key
c = Census(census_key, year=2018)

In [3]:
c

<census.core.Census at 0x242a25b7148>

In [4]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

census_pd.head()

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty Count,Zipcode
0,ZCTA5 00601,13092.0,17242.0,40.5,6999.0,10772.0,601
1,ZCTA5 00602,16358.0,38442.0,42.3,9277.0,19611.0,602
2,ZCTA5 00603,16603.0,48814.0,41.1,11307.0,24337.0,603
3,ZCTA5 00606,12832.0,6437.0,43.3,5943.0,4163.0,606
4,ZCTA5 00610,19309.0,27073.0,42.1,10220.0,11724.0,610


In [5]:
census_pd.count()

Name                 33120
Household Income     33085
Population           33120
Median Age           33120
Per Capita Income    32776
Poverty Count        33085
Zipcode              33120
dtype: int64

In [6]:
census_pd.dropna(how='any')

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty Count,Zipcode
0,ZCTA5 00601,13092.0,17242.0,40.5,6999.0,10772.0,00601
1,ZCTA5 00602,16358.0,38442.0,42.3,9277.0,19611.0,00602
2,ZCTA5 00603,16603.0,48814.0,41.1,11307.0,24337.0,00603
3,ZCTA5 00606,12832.0,6437.0,43.3,5943.0,4163.0,00606
4,ZCTA5 00610,19309.0,27073.0,42.1,10220.0,11724.0,00610
...,...,...,...,...,...,...,...
33079,ZCTA5 99922,34028.0,330.0,39.5,18213.0,129.0,99922
33081,ZCTA5 99925,57375.0,927.0,43.6,25840.0,172.0,99925
33082,ZCTA5 99926,53409.0,1635.0,34.5,22453.0,235.0,99926
33083,ZCTA5 99927,-666666666.0,38.0,55.5,13658.0,28.0,99927


In [7]:
# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * census_pd["Poverty Count"].astype(float) / census_pd["Population"].astype(float)
census_pd.head()

Unnamed: 0,Name,Household Income,Population,Median Age,Per Capita Income,Poverty Count,Zipcode,Poverty Rate
0,ZCTA5 00601,13092.0,17242.0,40.5,6999.0,10772.0,601,62.475351
1,ZCTA5 00602,16358.0,38442.0,42.3,9277.0,19611.0,602,51.014515
2,ZCTA5 00603,16603.0,48814.0,41.1,11307.0,24337.0,603,49.856599
3,ZCTA5 00606,12832.0,6437.0,43.3,5943.0,4163.0,606,64.672984
4,ZCTA5 00610,19309.0,27073.0,42.1,10220.0,11724.0,610,43.305138


In [8]:
# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

census_pd.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17242.0,40.5,13092.0,6999.0,10772.0,62.475351
1,602,38442.0,42.3,16358.0,9277.0,19611.0,51.014515
2,603,48814.0,41.1,16603.0,11307.0,24337.0,49.856599
3,606,6437.0,43.3,12832.0,5943.0,4163.0,64.672984
4,610,27073.0,42.1,19309.0,10220.0,11724.0,43.305138


In [9]:
# Visualize
print(len(census_pd))

33120


In [10]:
zip_codes=[90001, 90002, 90005, 90006, 90003, 90004, 90007, 90008, 90010, 90013, 90014, 90017, 90021, 90009, 90011, 90012, 90015, 90016,
90018, 90019, 90020, 90022, 90025, 90027, 90031, 90032, 90023, 90024, 90026, 90028, 90029, 90030, 90033, 90034, 90037, 90039, 90042, 90043, 90045, 90035, 90036, 90038, 90040, 90041, 90044, 90046, 90048, 90052, 90053, 90054, 90057, 90058, 90047, 90049, 90050, 90051, 90055, 90056, 90059, 90062, 90063, 90064, 90065, 90066, 90070, 90060, 90061, 90067, 90068, 90071, 90072, 90074, 90078, 90079, 90084, 90086, 90087, 90095, 90096, 90073, 90075, 90076, 90077, 90080, 90081, 90082, 90083, 90088,
90089, 90091, 90093, 90099, 90189, 90101, 90103]

zip_codes_df=pd.DataFrame({"Zipcode": zip_codes})
zip_codes_df.head()

Unnamed: 0,Zipcode
0,90001
1,90002
2,90005
3,90006
4,90003


In [11]:
zip_codes_df.count

<bound method DataFrame.count of     Zipcode
0     90001
1     90002
2     90005
3     90006
4     90003
..      ...
90    90093
91    90099
92    90189
93    90101
94    90103

[95 rows x 1 columns]>

In [12]:
census_pd.dtypes

Zipcode               object
Population           float64
Median Age           float64
Household Income     float64
Per Capita Income    float64
Poverty Count        float64
Poverty Rate         float64
dtype: object

In [22]:
census_pd['Zipcode'].astype(str)

0        00601
1        00602
2        00603
3        00606
4        00610
         ...  
33115    87515
33116    87518
33117    87511
33118    87578
33119    87532
Name: Zipcode, Length: 33120, dtype: object

In [23]:
zip_codes_df.dtypes

Zipcode    int32
dtype: object

In [24]:
zip_codes_df['Zipcode']=zip_codes_df['Zipcode'].astype(str)

In [25]:
zip_codes_df.dtypes

Zipcode    object
dtype: object

In [27]:
la_df=pd.merge(zip_codes_df, census_pd, on='Zipcode', how='inner')
la_df.head()

Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,90001,58975.0,29.1,38521.0,12835.0,16911.0,28.674862
1,90002,53111.0,27.7,35410.0,12372.0,17365.0,32.695675
2,90005,39479.0,35.8,35149.0,22694.0,11036.0,27.954102
3,90006,61698.0,33.2,35411.0,16383.0,17326.0,28.081948
4,90003,72741.0,28.6,37226.0,12316.0,22186.0,30.499993


In [29]:
la_df.count()

Zipcode              65
Population           65
Median Age           65
Household Income     65
Per Capita Income    65
Poverty Count        65
Poverty Rate         62
dtype: int64