In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from us import states
# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2016)

# Configure gmaps
gmaps.configure(gkey)



In [2]:
census_data = c.acs5.get(("NAME","B01003_001E",
                          "B19001_017E",
                          "B19113_001E",
                          "B25002_002E"),{'for':'zip code tabulation area:*'})



In [3]:
# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={"B01003_001E": "Population",
                                     "B25002_002E": "Households",
                                     "B19113_001E": "Median family income",
                                     "B19001_017E":"Households with household income $200,000 or more",
                                     "NAME": "Name", "zip code tabulation area": "zipcode"})



In [9]:
census_df.sort_values("Median family income", ascending=True).head()

Unnamed: 0,Population,"Households with household income $200,000 or more",Median family income,Households,Name,zipcode
11747,30.0,0.0,-666666666.0,21.0,ZCTA5 36865,36865
6237,382.0,0.0,-666666666.0,179.0,ZCTA5 20687,20687
6244,0.0,0.0,-666666666.0,0.0,ZCTA5 20701,20701
8553,80.0,0.0,-666666666.0,53.0,ZCTA5 27982,27982
8549,169.0,0.0,-666666666.0,108.0,ZCTA5 27978,27978


In [18]:
#clean data -removed non-sensical data
cleaned_census_df = census_df[census_df['Median family income'] > 0 ] 
cleaned_census_df.sort_values("Median family income", ascending=True).head()


Unnamed: 0,Population,"Households with household income $200,000 or more",Median family income,Households,Name,zipcode
6857,5486.0,0.0,2499.0,31.0,ZCTA5 22904,22904
32590,89.0,0.0,2499.0,43.0,ZCTA5 98939,98939
29387,98.0,3.0,2499.0,54.0,ZCTA5 87064,87064
11901,1107.0,0.0,2499.0,577.0,ZCTA5 37228,37228
13452,321.0,0.0,2499.0,104.0,ZCTA5 42151,42151


In [27]:
income_df = cleaned_census_df.copy()

# calculate % "rich"(over200k)

income_df["Percent of households with income over $200,000"] = income_df["Households with household income $200,000 or more"]/income_df["Households"]*100


income_df.head()

Unnamed: 0,Population,"Households with household income $200,000 or more",Median family income,Households,Name,zipcode,"Percent of households with income over $200,000"
0,17423.0,146.0,82512.0,7190.0,ZCTA5 01001,1001,2.030598
1,29970.0,722.0,94489.0,9561.0,ZCTA5 01002,1002,7.551511
3,5228.0,89.0,99127.0,1840.0,ZCTA5 01005,1005,4.836957
4,14888.0,350.0,92100.0,5611.0,ZCTA5 01007,1007,6.237747
5,1194.0,24.0,72000.0,530.0,ZCTA5 01008,1008,4.528302


In [29]:
income_df['ZipCode'] = income_df['zipcode'].astype(str)
income_df.dtypes

Population                                           float64
Households with household income $200,000 or more    float64
Median family income                                 float64
Households                                           float64
Name                                                  object
zipcode                                               object
Percent of households with income over $200,000      float64
ZipCode                                               object
dtype: object

In [30]:
income_df.to_csv('incomebyzip.csv')

In [None]:
housing_cost_data = c.acs5.get(("NAME","B25001_001E",
                          'B25002_002E',
                          "B25003_002E",
                          "B25075_025E",
                          "B25075_026E",
                          "B25075_027E",
                          "B25077_001E",
                          "B25064_001E"),{'for':'zip code tabulation area:*'})


In [None]:
# Convert to DataFrame
housing_cost_data_df = pd.DataFrame(housing_cost_data)

# Column Reordering
housing_cost_data_df = housing_cost_data_df.rename(columns={"B25001_001E":"Total housing units",
                          'B25002_002E':"Occupied housing units (households)",
                          "B25003_002E":"Owner Occupied housing units",
                          "B25075_025E":"Housing units value $1,000,000 to $1,499,999 (owner occupied)",
                          "B25075_026E":"Housing units with value $1,500,000 to $1,999,999 (owner occupied)",
                          "B25075_027E":"Housing units with value $2,000,000 or more (owner occupied)",
                          "B25077_001E":" Median housing value ($) - owner occupied units",
                          "B25064_001E":"Median gross rent ($) - renter occupied units",
                          "NAME": "Name", "zip code tabulation area": "zipcode"})


In [None]:
housing_cost_data_df.head()


In [None]:
housing_cost_data_df.to_csv('housing.csv')