In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census

In [2]:
# Census API Key
from api_keys import census_key
api_keys = Census(census_key, year=2017)

In [3]:
# url: https://api.census.gov/data/2018/acs/acs5/profile?get=group(DP02)&for=us:1

# Run Census Search to retrieve data on all zip codes (2017 ACS5 Census)
census_data = api_keys.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E"), {'for': 'zip code tabulation area:*'})

In [4]:
# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

In [5]:
# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

In [6]:
# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

In [7]:
# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate"]]

In [8]:
# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,601,17599.0,38.9,11757.0,7041.0,11282.0,64.105915
1,602,39209.0,40.9,16190.0,8978.0,20428.0,52.100283
2,603,50135.0,40.4,16645.0,10897.0,25176.0,50.216416
3,606,6304.0,42.8,13387.0,5960.0,4092.0,64.911168
4,610,27590.0,41.4,18741.0,9266.0,12553.0,45.498369


In [9]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

In [None]:
!pip install uszipcode

In [None]:
from uszipcode import ZipcodeSearchEngine
search = ZipcodeSearchEngine()

zcode_df = pd.DataFrame({'Postal_Code':[10006.0, 11415, 10037, 10, np.nan]})

def zco(x):
    return search.by_zipcode(x)['City']

zcode['City'] = zcode['Postal_Code'].fillna(0).astype(int).apply(zco)
print (zcode)
   Postal_Code         City
0      10006.0     New York
1      11415.0  Kew Gardens
2      10037.0     New York
3         10.0         None
4          NaN         None

In [None]:
# def zco():
#     for x in zcode['Postal_Code']:
#         x = int(x)                          #convert to int because value is float
#         city = search.by_zipcode(x)['City'] #Module extracts the city name 
#         if city == str(city):               #The module doesn't recognize some zipcodes, thus generating None.This will skip None values.
#             str(x).replace(str(x), city)    #replace int value with city
#         else: continue

# zcode['Postal_Code'] = zcode['Postal_Code'].apply(zco())

# zcode['Postal_Code'].fillna(0).astype(int).astype(str).apply(zco)

# # def zco(x):
#     city = search.by_zipcode(x)['City']  
#     return city if city else x  # if city is None for certain zipcodes, take advantage of the truthiness of None

# Alternatively, you could also use df.transform(callable, axis=1):

# zcode['Postal_Code'].fillna(0).astype(int).astype(str).transform(zco)

In [None]:
# def zco(x):
#     for x in zcode['Zipcode']:
#         x = int(x)
#         city = search.by_zipcode(x)['City']
#     return city if city else x

