In [2]:
import math
import numpy as np
import pandas as pd
import json
import requests
import urllib

In [3]:
from censusAPI import myAPI

## User-defined parameters

In [4]:
#Variables and predicates for both start/end years
#Please refer to this website for the list of variables
#https://api.census.gov/data/2018/acs/acs5/profile/variables.html
dsource = 'acs/acs5/profile'
cols = f'GEO_ID,DP05_0014E,DP05_0015E,DP05_0016E,DP05_0017E,DP05_0033E,DP05_0036E,DP05_0038E,DP05_0070E,DP05_0071E,DP05_0077E,DP05_0078E'
year = '2018'

Age60p = ['DP05_0014E','DP05_0015E','DP05_0016E','DP05_0017E']

#PopTot = 'DP05_0033E'
#PopOneRace = 'DP05_0036E'
#BlackA = 'DP05_0038E'
#NotHisW = 'DP05_0077E'
#NotHisB = 'DP05_0078E'

### Utility Functions

In [5]:
def get_moe(m):
    result = math.sqrt(sum(map(lambda x: x**2, m)))
    return result

def get_cv(e, m): 
    if e == 0:
        return np.nan
    else:
        return np.absolute(m/1.645/e*100)
    
def get_pct(e,agg_e):
    if agg_e == 0:
        return np.nan
    else:
        return e/agg_e

def get_pctmoe(e,m,agg_e,agg_m): #check to make sure this doesn't break 
    if agg_e == 0:
        return np.nan
    else: 
        return (1/agg_e)*math.sqrt((m**2)-(((e/agg_e)**2)*(agg_m**2)))
    
def clean_data(df,var):
    dff = df[var].copy()
    var_num = var[1:]
    for col in var_num:
        dff[col] = dff[col].astype(float)
    dff = dff.replace([999999999, 555555555, 333333333, 222222222,\
                    666666666, 888888888, -999999999, -555555555,\
                    -333333333, -222222222, -666666666, -888888888], np.nan)
    return dff

## Median HHI and Average HH Size - All Zip Codes in U.S.

In [6]:
base_url = f'https://api.census.gov/data/{year}/{dsource}'

In [7]:
data_url = f'{base_url}?get={cols}&for=zip%20code%20tabulation%20area:*&key={myAPI}'
df = pd.read_json(data_url)
resp = requests.request('GET', data_url).content
df = pd.DataFrame(json.loads(resp)[1:])
df.columns = json.loads(resp)[0]
df.head()

Unnamed: 0,GEO_ID,DP05_0014E,DP05_0015E,DP05_0016E,DP05_0017E,DP05_0033E,DP05_0036E,DP05_0038E,DP05_0070E,DP05_0071E,DP05_0077E,DP05_0078E,zip code tabulation area
0,8600000US43964,745,916,522,188,8642,8505,256,8642,27,8209,256,43964
1,8600000US28216,2225,2958,1577,542,51116,49865,32929,51116,4077,12020,32646,28216
2,8600000US28277,4079,5063,1842,727,71605,69329,6346,71605,5172,47299,6154,28277
3,8600000US28278,1336,1593,533,163,27286,25546,8672,27286,2870,12758,8214,28278
4,8600000US28303,1673,2610,1406,688,29414,27693,11486,29414,3084,12090,11295,28303


In [8]:
df.shape

(33120, 13)

In [9]:
dff = clean_data(df,list(df))
dff.head()

Unnamed: 0,GEO_ID,DP05_0014E,DP05_0015E,DP05_0016E,DP05_0017E,DP05_0033E,DP05_0036E,DP05_0038E,DP05_0070E,DP05_0071E,DP05_0077E,DP05_0078E,zip code tabulation area
0,8600000US43964,745.0,916.0,522.0,188.0,8642.0,8505.0,256.0,8642.0,27.0,8209.0,256.0,43964.0
1,8600000US28216,2225.0,2958.0,1577.0,542.0,51116.0,49865.0,32929.0,51116.0,4077.0,12020.0,32646.0,28216.0
2,8600000US28277,4079.0,5063.0,1842.0,727.0,71605.0,69329.0,6346.0,71605.0,5172.0,47299.0,6154.0,28277.0
3,8600000US28278,1336.0,1593.0,533.0,163.0,27286.0,25546.0,8672.0,27286.0,2870.0,12758.0,8214.0,28278.0
4,8600000US28303,1673.0,2610.0,1406.0,688.0,29414.0,27693.0,11486.0,29414.0,3084.0,12090.0,11295.0,28303.0


In [10]:
dff['Pop60p'] = dff.loc[:,Age60p].sum(axis=1)
#dff['DP03_0062C'] = dff.apply(lambda x: (get_cv(x['DP03_0062E'],x['DP03_0062M'])),axis=1)
#dff['DP02_0015C'] = dff.apply(lambda x: (get_cv(x['DP02_0015E'],x['DP02_0015M'])),axis=1)
dff.head()

Unnamed: 0,GEO_ID,DP05_0014E,DP05_0015E,DP05_0016E,DP05_0017E,DP05_0033E,DP05_0036E,DP05_0038E,DP05_0070E,DP05_0071E,DP05_0077E,DP05_0078E,zip code tabulation area,Pop60p
0,8600000US43964,745.0,916.0,522.0,188.0,8642.0,8505.0,256.0,8642.0,27.0,8209.0,256.0,43964.0,2371.0
1,8600000US28216,2225.0,2958.0,1577.0,542.0,51116.0,49865.0,32929.0,51116.0,4077.0,12020.0,32646.0,28216.0,7302.0
2,8600000US28277,4079.0,5063.0,1842.0,727.0,71605.0,69329.0,6346.0,71605.0,5172.0,47299.0,6154.0,28277.0,11711.0
3,8600000US28278,1336.0,1593.0,533.0,163.0,27286.0,25546.0,8672.0,27286.0,2870.0,12758.0,8214.0,28278.0,3625.0
4,8600000US28303,1673.0,2610.0,1406.0,688.0,29414.0,27693.0,11486.0,29414.0,3084.0,12090.0,11295.0,28303.0,6377.0


## Export to Excel

In [11]:
dff.to_excel('COVID_US_zips_v2.xlsx')