# Phoenix Households by AMI Band & Building Size - Multifamily vs. Single Family - PUMS data
### specify year, import AMI bands in py file
#### explore shift in AMI distribution by building type

- https://www.census.gov/data/developers/data-sets/

-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#from AMI_BANDS_2021 import *
from AMI_BANDS_2012 import *

#year = '2021'
year = '2012'

In [4]:
#parameters
sample = 'acs1'

phx_pumas = ['0400112','0400113','0400114','0400115','0400116','0400117',
             '0400118','0400119','0400120','0400121','0400122','0400123',
             '0400125','0400128','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC,BLD'

In [5]:
bld_recode = {1.0:'sf',2.0:'sf',3.0:'sf',4.0:'mf',5.0:'mf',
             6.0:'mf',7.0:'mf',8.0:'mf',9.0:'mf',10.0:'oth',0.0:'na'}

In [6]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [7]:
raw = get.get_puma(sample,year,data_cols)

In [8]:
df = raw.copy()

In [9]:
df['ST'] = df['ST'].str.zfill(2)
df['PUMA'] = df['PUMA'].str.zfill(5)
df['GEO_ID'] = df['ST']+df['PUMA']

In [10]:
#make geo identifiers, reduce df to PHX PUMAs
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col !='GEO_ID']] #move id to first col

#clean data cols to be numeric
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [11]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],
                   labels=['1','2','3','4','5','6','7','8'])

df['BSize'] = df['BLD'].map(bld_recode)

In [12]:
df.BSize.unique()

array(['sf', 'mf', 'na', 'oth'], dtype=object)

In [13]:
df.BSize.value_counts()

sf     4715
mf     1598
na      484
oth       3
Name: BSize, dtype: int64

In [14]:
dff = df[~(df.HHSz.isna()) & (df.HINCP!=-60000)].copy()

#generate adjusted household income using ACS adjustment factor
if year == '2012': 
    df['HINCP'] = df.ADJINC/1000000 * df.HINCP
else:
    df['HINCP'] = df.ADJINC * df.HINCP

print(dff.WGTP.sum())

dff['HHSz'] = dff['HHSz'].astype(str)

570939.0


In [15]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),inc_lbl[0],
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),inc_lbl[1],
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),
                   inc_lbl[2],np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),
                   inc_lbl[3],np.where((dff['HINCP']>dff['HHSz'].map(AMI_100pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_120pct)),
                                       inc_lbl[4],inc_lbl[5])))))

In [16]:
pivot = dff.drop(['HINCP','NP','HHSz','BLD'],axis=1).copy()
pivot = pivot.groupby(['GEO_ID','AMI_range','BSize']).sum().reset_index()
pivot.rename(columns={'WGTP':'hh_e'},inplace=True)

In [17]:
pivot['hh_SE'] = pivot.apply(lambda x: (calc.get_se(x['hh_e'],x[repwts])),axis=1)
pivot['hh_MOE'] = pivot.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
pivot['hh_CV'] = pivot.apply(lambda x: (calc.get_cv(x['hh_e'],x['hh_SE'])),axis=1)
pivot = pivot.drop(columns=repwts)

In [18]:
pivot.head()

Unnamed: 0,GEO_ID,AMI_range,BSize,hh_e,ADJINC,hh_SE,hh_MOE,hh_CV
0,400112,100_120_ami,mf,1056.0,6061242.0,345.570796,568.463959,19.893317
1,400112,100_120_ami,sf,1873.0,20204140.0,321.834701,529.418084,10.445499
2,400112,30_50_ami,mf,514.0,5051035.0,174.635334,287.275125,20.653949
3,400112,30_50_ami,sf,2067.0,22224554.0,314.786078,517.823099,9.257829
4,400112,50_80_ami,mf,695.0,8081656.0,172.390037,283.581612,15.078615


In [19]:
final = pd.pivot_table(pivot,values=['hh_e','hh_MOE','hh_CV'],index='GEO_ID',\
                      columns=['AMI_range','BSize'],aggfunc=np.sum).reset_index()

In [20]:
final.head(10)

Unnamed: 0_level_0,GEO_ID,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,...,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e
AMI_range,Unnamed: 1_level_1,100_120_ami,100_120_ami,30_50_ami,30_50_ami,30_50_ami,50_80_ami,50_80_ami,50_80_ami,80_100_ami,...,50_80_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o120_ami,o120_ami,u30_ami,u30_ami,u30_ami
BSize,Unnamed: 1_level_2,mf,sf,mf,oth,sf,mf,oth,sf,mf,...,mf,oth,sf,mf,sf,mf,sf,mf,oth,sf
0,400112,19.893317,10.445499,20.653949,,9.257829,15.078615,,10.525689,18.353185,...,695.0,,3526.0,793.0,1175.0,2978.0,27850.0,507.0,,1656.0
1,400113,16.295471,8.812831,17.431359,,9.817885,13.494952,,7.912028,15.027946,...,2218.0,,4288.0,1856.0,2323.0,2936.0,17727.0,1271.0,,2644.0
2,400114,14.690879,8.690339,10.066231,,8.633907,9.65959,,7.890977,14.960645,...,3595.0,,4094.0,1502.0,3085.0,2420.0,15127.0,1823.0,,3437.0
3,400115,13.882778,9.836043,8.802918,,10.049706,8.964469,,5.902464,14.229432,...,3322.0,,6237.0,2121.0,2644.0,1077.0,9078.0,3107.0,,2100.0
4,400116,14.079471,9.960418,8.40579,,9.873888,7.130785,,6.471052,11.729424,...,3729.0,,5276.0,1715.0,1570.0,2300.0,9402.0,6400.0,,2798.0
5,400117,12.270286,12.223057,8.137259,,10.050395,9.388073,,7.73579,10.171267,...,3438.0,,3718.0,1978.0,2434.0,2689.0,10042.0,6902.0,,3300.0
6,400118,12.987172,13.535332,6.202375,,7.556188,9.417952,,7.155115,14.830238,...,2885.0,,3922.0,1757.0,1856.0,3497.0,6042.0,5644.0,41.0,4649.0
7,400119,46.946861,11.568009,10.879507,,7.887947,12.227601,,5.796055,34.128074,...,1425.0,,5713.0,194.0,3334.0,250.0,5133.0,1988.0,,5051.0
8,400120,16.096195,9.360189,18.412895,,11.11606,11.850665,,8.880952,17.782253,...,1968.0,,2676.0,1329.0,2370.0,2129.0,18421.0,1329.0,,1518.0
9,400121,36.970971,10.170335,14.847804,,9.708767,25.032907,,5.142719,35.342413,...,255.0,,8974.0,256.0,4960.0,292.0,6837.0,791.0,,7541.0


In [21]:
final.to_excel(f'../../output/acs/hhinc_bsize_120ami_pumas_{year[-2:]}.xlsx')