In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from us import states
# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2016)

# Configure gmaps
gmaps.configure(gkey)



In [2]:
census_data = c.acs5.get(("NAME","B01003_001E",
                          "B19001_017E",
                          "B19113_001E",
                          "B25002_002E"),{'for':'zip code tabulation area:*'})



In [None]:
# Convert to DataFrame
census_df = pd.DataFrame(census_data)

# Column Reordering
census_df = census_df.rename(columns={"B01003_001E": "Population",
                                     "B25002_002E": "Households",
                                     "B19113_001E": "Median family income",
                                     "B19001_017E":"Households with household income $200,000 or more",
                                     "NAME": "Name", "zip code tabulation area": "zipcode"})



In [4]:
#clean data
cleaned_census_df = census_df.replace(-666666666.0, "NA")

cleaned_census_df.head()


Unnamed: 0,Population,"Households with household income $200,000 or more",Median family income,Households,Name,zipcode
0,17423.0,146.0,82512.0,7190.0,ZCTA5 01001,1001
1,29970.0,722.0,94489.0,9561.0,ZCTA5 01002,1002
2,11296.0,0.0,,26.0,ZCTA5 01003,1003
3,5228.0,89.0,99127.0,1840.0,ZCTA5 01005,1005
4,14888.0,350.0,92100.0,5611.0,ZCTA5 01007,1007


In [16]:
#drop rows  that lack data#NOT REMOVING ROWS YET
cleaned_census_df_no_missing=cleaned_census_df.dropna()

income_df = cleaned_census_df_no_missing.copy()

# calculate % "rich"(over200k)

income_df["Percent of households with income over $200,000"] = income_df["Households with household income $200,000 or more"]/income_df["Households"]*100


income_df.head()

Unnamed: 0,Population,"Households with household income $200,000 or more",Median family income,Households,Name,zipcode,"Percent of households with income over $200,000"
0,17423.0,146.0,82512.0,7190.0,ZCTA5 01001,1001,2.030598
1,29970.0,722.0,94489.0,9561.0,ZCTA5 01002,1002,7.551511
2,11296.0,0.0,,26.0,ZCTA5 01003,1003,0.0
3,5228.0,89.0,99127.0,1840.0,ZCTA5 01005,1005,4.836957
4,14888.0,350.0,92100.0,5611.0,ZCTA5 01007,1007,6.237747


In [17]:
income_df.to_csv('incomebyzip.csv')

In [None]:
housing_cost_data = c.acs5.get(("NAME","B25001_001E",
                          'B25002_002E',
                          "B25003_002E",
                          "B25075_025E",
                          "B25075_026E",
                          "B25075_027E",
                          "B25077_001E",
                          "B25064_001E"),{'for':'zip code tabulation area:*'})


In [None]:
# Convert to DataFrame
housing_cost_data_df = pd.DataFrame(housing_cost_data)

# Column Reordering
housing_cost_data_df = housing_cost_data_df.rename(columns={"B25001_001E":"Total housing units",
                          'B25002_002E':"Occupied housing units (households)",
                          "B25003_002E":"Owner Occupied housing units",
                          "B25075_025E":"Housing units value $1,000,000 to $1,499,999 (owner occupied)",
                          "B25075_026E":"Housing units with value $1,500,000 to $1,999,999 (owner occupied)",
                          "B25075_027E":"Housing units with value $2,000,000 or more (owner occupied)",
                          "B25077_001E":" Median housing value ($) - owner occupied units",
                          "B25064_001E":"Median gross rent ($) - renter occupied units",
                          "NAME": "Name", "zip code tabulation area": "zipcode"})


In [None]:
housing_cost_data_df.head()


In [None]:
housing_cost_data_df.to_csv('housing.csv')