In [1]:
import math
import numpy as np
import pandas as pd
import json
import requests
import urllib

In [2]:
from censusAPI import myAPI

## User-defined parameters

In [3]:
#Variables and predicates for both start/end years
#Please refer to this website for the list of variables
#https://api.census.gov/data/2019/acs/acs5/profile/variables.html
dsource = 'acs/acs5/profile'
cols = f'GEO_ID,DP03_0062E,DP03_0062M,DP02_0016E,DP02_0016M'  
year = '2019'

### Utility Functions

In [4]:
def get_moe(m):
    result = math.sqrt(sum(map(lambda x: x**2, m)))
    return result

def get_cv(e, m): 
    if e == 0:
        return np.nan
    else:
        return np.absolute(m/1.645/e*100)
    
def get_pct(e,agg_e):
    if agg_e == 0:
        return np.nan
    else:
        return e/agg_e

def get_pctmoe(e,m,agg_e,agg_m): #check to make sure this doesn't break 
    if agg_e == 0:
        return np.nan
    else: 
        return (1/agg_e)*math.sqrt((m**2)-(((e/agg_e)**2)*(agg_m**2)))
    
def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff

## Median HHI and Average HH Size - All Zip Codes in U.S.

In [5]:
base_url = f'https://api.census.gov/data/{year}/{dsource}'

In [6]:
data_url = f'{base_url}?get={cols}&for=zip%20code%20tabulation%20area:*&key={myAPI}'
df = pd.read_json(data_url)
resp = requests.request('GET', data_url).content
df = pd.DataFrame(json.loads(resp)[1:])
df.columns = json.loads(resp)[0]
df.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0016E,DP02_0016M,state,zip code tabulation area
0,8600000US25245,57895,44070,2.04,0.43,54,25245
1,8600000US25268,27200,14584,2.72,0.53,54,25268
2,8600000US25286,38313,7178,2.77,0.36,54,25286
3,8600000US25303,58820,4344,2.28,0.15,54,25303
4,8600000US25311,40920,4217,2.06,0.08,54,25311


In [7]:
df.shape

(33120, 7)

In [8]:
dff = clean_data(df,list(df))
dff.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0016E,DP02_0016M,state,zip code tabulation area
0,8600000US25245,57895.0,44070.0,2.04,0.43,54.0,25245.0
1,8600000US25268,27200.0,14584.0,2.72,0.53,54.0,25268.0
2,8600000US25286,38313.0,7178.0,2.77,0.36,54.0,25286.0
3,8600000US25303,58820.0,4344.0,2.28,0.15,54.0,25303.0
4,8600000US25311,40920.0,4217.0,2.06,0.08,54.0,25311.0


In [9]:
dff['DP03_0062C'] = dff.apply(lambda x: (get_cv(x['DP03_0062E'],x['DP03_0062M'])),axis=1)
dff['DP02_0016C'] = dff.apply(lambda x: (get_cv(x['DP02_0016E'],x['DP02_0016M'])),axis=1)
dff.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0016E,DP02_0016M,state,zip code tabulation area,DP03_0062C,DP02_0016C
0,8600000US25245,57895.0,44070.0,2.04,0.43,54.0,25245.0,46.273899,12.813636
1,8600000US25268,27200.0,14584.0,2.72,0.53,54.0,25268.0,32.594314,11.845164
2,8600000US25286,38313.0,7178.0,2.77,0.36,54.0,25286.0,11.389152,7.900541
3,8600000US25303,58820.0,4344.0,2.28,0.15,54.0,25303.0,4.489509,3.99936
4,8600000US25311,40920.0,4217.0,2.06,0.08,54.0,25311.0,6.264726,2.360787


## Export to Excel

In [10]:
dff.to_excel('US_zips_inc.xlsx')