In [8]:
import math
import numpy as np
import pandas as pd
import json
import requests
import urllib

In [2]:
from censusAPI import myAPI

## User-defined parameters

In [3]:
#Variables and predicates for both start/end years
#Please refer to this website for the list of variables
#https://api.census.gov/data/2018/acs/acs5/profile/variables.html
dsource = 'acs/acs5/profile'
cols = f'GEO_ID,DP03_0062E,DP03_0062M,DP02_0015E,DP02_0015M'  
year = '2018'

### Utility Functions

In [12]:
def get_moe(m):
    result = math.sqrt(sum(map(lambda x: x**2, m)))
    return result

def get_cv(e, m): 
    if e == 0:
        return np.nan
    else:
        return np.absolute(m/1.645/e*100)
    
def get_pct(e,agg_e):
    if agg_e == 0:
        return np.nan
    else:
        return e/agg_e

def get_pctmoe(e,m,agg_e,agg_m): #check to make sure this doesn't break 
    if agg_e == 0:
        return np.nan
    else: 
        return (1/agg_e)*math.sqrt((m**2)-(((e/agg_e)**2)*(agg_m**2)))
    
def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff

## Median HHI and Average HH Size - All Zip Codes in U.S.

In [4]:
base_url = f'https://api.census.gov/data/{year}/{dsource}'

In [6]:
data_url = f'{base_url}?get={cols}&for=zip%20code%20tabulation%20area:*&key={myAPI}'
df = pd.read_json(data_url)
resp = requests.request('GET', data_url).content
df = pd.DataFrame(json.loads(resp)[1:])
df.columns = json.loads(resp)[0]
df.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0015E,DP02_0015M,zip code tabulation area
0,8600000US43964,42826,3932,2.26,0.08,43964
1,8600000US28216,48647,2420,2.52,0.09,28216
2,8600000US28277,105885,2303,2.68,0.05,28277
3,8600000US28278,90282,6483,2.97,0.1,28278
4,8600000US28303,39896,2177,2.21,0.07,28303


In [7]:
df.shape

(33120, 6)

In [13]:
dff = clean_data(df,list(df))
dff.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0015E,DP02_0015M,zip code tabulation area
0,8600000US43964,42826.0,3932.0,2.26,0.08,43964.0
1,8600000US28216,48647.0,2420.0,2.52,0.09,28216.0
2,8600000US28277,105885.0,2303.0,2.68,0.05,28277.0
3,8600000US28278,90282.0,6483.0,2.97,0.1,28278.0
4,8600000US28303,39896.0,2177.0,2.21,0.07,28303.0


In [14]:
dff['DP03_0062C'] = dff.apply(lambda x: (get_cv(x['DP03_0062E'],x['DP03_0062M'])),axis=1)
dff['DP02_0015C'] = dff.apply(lambda x: (get_cv(x['DP02_0015E'],x['DP02_0015M'])),axis=1)
dff.head()

Unnamed: 0,GEO_ID,DP03_0062E,DP03_0062M,DP02_0015E,DP02_0015M,zip code tabulation area,DP03_0062C,DP02_0015C
0,8600000US43964,42826.0,3932.0,2.26,0.08,43964.0,5.581361,2.151868
1,8600000US28216,48647.0,2420.0,2.52,0.09,28216.0,3.024081,2.171081
2,8600000US28277,105885.0,2303.0,2.68,0.05,28277.0,1.322189,1.134147
3,8600000US28278,90282.0,6483.0,2.97,0.1,28278.0,4.365248,2.046811
4,8600000US28303,39896.0,2177.0,2.21,0.07,28303.0,3.317135,1.925484


## Export to Excel

In [15]:
dff.to_excel('full_US_zips.xlsx')