# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2021'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

SMOCP - selected owner costs (monthly) (not '00000': none / '-1')
https://api.census.gov/data/2021/acs/acs1/pums/variables/SMOCP.json

In [4]:
#Income to afford median household rent price + utilities by PUMA
#3 bedroom multifamily unit
inc_needed_mf = {'0400112':132198,'0400113': 96351,'0400114': 76625 ,'0400115': 80894,\
              '0400116': 56277,'0400117': 90218 ,'0400118': 72771 ,'0400119': 68503,\
              '0400120': 93160,'0400121': 61416,'0400122': 57977 ,'0400123': 78324 ,\
              '0400125': 73724,'0400128': 70450,'0400129': 88726}

 

 #Income to afford median household sales price by PUMA
#3 bedroom single-family unit
inc_needed_sf = {'0400112': 124324,'0400113':103396,'0400114': 82883,'0400115':77599,\
              '0400116':82883,'0400117':107748,'0400118':82883,'0400119':80811,\
              '0400120':82883,'0400121':80811,'0400122':65146,'0400123':72523,\
              '0400125':70450,'0400128':80811,'0400129':91171}

In [5]:
#AMI bands in 2021 by household size
AMI_30pct = {'1':16600,'2':19000,'3':21960,'4':26500,'5':31040,'6':35580,\
            '7':40120,'8':44660}
AMI_50pct = {'1':27650,'2':31600,'3':35500,'4':39500,'5':42700,'6':45850,\
            '7':49000,'8':52150}
AMI_80pct = {'1':44250,'2':50600,'3':56900,'4':63200,'5':68300,'6':73350,\
            '7':78400,'8':83450}
AMI_100pct = {'1':55300,'2':63200,'3':71100,'4':79000,'5':85400,'6':91700,\
            '7':98000,'8':104300}

In [6]:
#Housing costs affordable to different AMI bands based on household size
unit_afford = {'1':[0,461,767,1217,1535,1000000000],\
              '2':[0,527,877,1392,1535,1000000000],\
              '3':[0,609,985,1565,1973,1000000000],\
              '4':[0,735,1096,1738,2192,1000000000],\
              '5':[0,861,1185,1878,2370,1000000000],\
              '6':[0,987,1272,2017,2545,1000000000],\
              '7':[0,1113,1360,2156,2720,1000000000],\
              '8':[0,1239,1447,2295,2894,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [7]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [8]:
df = get.get_puma(sample,y1,data_cols)

In [9]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [10]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],\
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)


In [11]:
df['inc_needed_sf'] = df['GEO_ID'].map(inc_needed_sf)
df['inc_needed_mf'] = df['GEO_ID'].map(inc_needed_mf)

In [12]:
dff = df[~(df.HHSz.isna())&(df.HINCP!=-60000)&(df.HINCP!=0)].copy()
dff['HINCP'] = dff.ADJINC * dff.HINCP

In [13]:
dff['can_rent_sf'] = np.where(dff.HINCP>=dff.inc_needed_sf,'can rent','cannot afford')
dff['can_rent_mf'] = np.where(dff.HINCP>=dff.inc_needed_mf,'can rent','cannot afford')

In [14]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),'u30_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),'30_50_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),\
                   '50_80_ami',np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),\
                   '80_100_ami','o100_ami'))))

In [16]:
#make a column for each PUMA that assesses the number of households 
sf_cols=[]
for k in inc_needed_sf.keys():
    dff[f'aff_{k}_can_sf']=np.where(dff.HINCP>=inc_needed_sf[k],1,0)
    dff[f'aff_{k}_cant_sf']=np.where(dff.HINCP<inc_needed_sf[k],1,0)
    dff[f'aff_{k}_can_sf_wt']=dff[f'aff_{k}_can_sf']*dff.WGTP
    dff[f'aff_{k}_cant_sf_wt']=dff[f'aff_{k}_cant_sf']*dff.WGTP
    sf_cols.append(f'aff_{k}_can_sf')
    sf_cols.append(f'aff_{k}_cant_sf')
    sf_cols.append(f'aff_{k}_can_sf_wt')
    sf_cols.append(f'aff_{k}_cant_sf_wt')

In [17]:
#make a column for each PUMA that assesses the number of households 
mf_cols=[]
for k in inc_needed_mf.keys():
    dff[f'aff_{k}_can_mf']=np.where(dff.HINCP>=inc_needed_mf[k],1,0)
    dff[f'aff_{k}_cant_mf']=np.where(dff.HINCP<inc_needed_mf[k],1,0)
    dff[f'aff_{k}_can_mf_wt']=dff[f'aff_{k}_can_mf']*dff.WGTP
    dff[f'aff_{k}_cant_mf_wt']=dff[f'aff_{k}_cant_mf']*dff.WGTP
    mf_cols.append(f'aff_{k}_can_mf')
    mf_cols.append(f'aff_{k}_cant_mf')
    mf_cols.append(f'aff_{k}_can_mf_wt')
    mf_cols.append(f'aff_{k}_cant_mf_wt')

In [18]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,aff_0400125_can_mf_wt,aff_0400125_cant_mf_wt,aff_0400128_can_mf,aff_0400128_cant_mf,aff_0400128_can_mf_wt,aff_0400128_cant_mf_wt,aff_0400129_can_mf,aff_0400129_cant_mf,aff_0400129_can_mf_wt,aff_0400129_cant_mf_wt
3601,400122,297649.192,2.0,72.0,1.029928,69.0,19.0,68.0,74.0,21.0,...,72.0,0.0,1,0,72.0,0.0,1,0,72.0,0.0
3602,400112,76214.672,2.0,55.0,1.029928,56.0,96.0,91.0,53.0,16.0,...,55.0,0.0,1,0,55.0,0.0,0,1,0.0,55.0
3606,400119,40476.1704,3.0,50.0,1.029928,51.0,87.0,16.0,92.0,87.0,...,0.0,50.0,0,1,0.0,50.0,0,1,0.0,50.0


### table by PUMA for renters by AMI range - can afford/can't afford median rent

In [19]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [20]:
drop_cols = ['HINCP','NP','ADJINC','HHSz','inc_needed_sf','inc_needed_mf']

In [21]:
table = dff.copy().drop(columns=drop_cols)

In [22]:
table_2 = table.copy().drop(columns=sf_cols+mf_cols).groupby(['GEO_ID','AMI_range','can_rent_sf','can_rent_mf']).sum().reset_index()
table_2 = make_est(table_2)
table_2 = table_2.drop(columns=repwts)
table_2 = pd.pivot_table(table_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','can_rent_sf','can_rent_mf'],aggfunc=np.sum).reset_index()

In [23]:
table_3 = table.copy().drop(columns=sf_cols+mf_cols+['AMI_range']).groupby(['GEO_ID','can_rent_sf','can_rent_mf']).sum().reset_index()
table_3 = make_est(table_3)
table_3 = table_3.drop(columns=repwts)
table_3 = pd.pivot_table(table_3,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['can_rent_sf','can_rent_mf'],aggfunc=np.sum).reset_index()

In [24]:
table_sf = table.copy().drop(columns=['AMI_range','can_rent_sf','can_rent_mf']+mf_cols).groupby(['GEO_ID']).sum().reset_index()
table_sf = make_est(table_sf)
table_sf = table_sf.drop(columns=repwts)

In [25]:
table_mf = table.copy().drop(columns=['AMI_range','can_rent_sf','can_rent_mf']+sf_cols).groupby(['GEO_ID']).sum().reset_index()
table_mf = make_est(table_mf)
table_mf = table_mf.drop(columns=repwts)

In [27]:
table_sf.head()

Unnamed: 0,GEO_ID,hh,aff_0400112_can_sf,aff_0400112_cant_sf,aff_0400112_can_sf_wt,aff_0400112_cant_sf_wt,aff_0400113_can_sf,aff_0400113_cant_sf,aff_0400113_can_sf_wt,aff_0400113_cant_sf_wt,...,aff_0400128_cant_sf,aff_0400128_can_sf_wt,aff_0400128_cant_sf_wt,aff_0400129_can_sf,aff_0400129_cant_sf,aff_0400129_can_sf_wt,aff_0400129_cant_sf_wt,hh_SE,hh_MOE,hh_CV
0,400112,57425.0,295,336,26156.0,31269.0,344,287,31675.0,25750.0,...,211,39028.0,18397.0,382,249,35454.0,21971.0,1356.414391,2231.301673,1.435904
1,400113,42824.0,189,307,15151.0,27673.0,236,260,19054.0,23770.0,...,211,23618.0,19206.0,262,234,21188.0,21636.0,1251.366213,2058.497421,1.776361
2,400114,46724.0,115,369,10535.0,36189.0,144,340,13843.0,32881.0,...,287,18642.0,28082.0,166,318,15375.0,31349.0,1310.525143,2155.81386,1.705059
3,400115,43853.0,61,333,6596.0,37257.0,94,300,10085.0,33768.0,...,249,14826.0,29027.0,122,272,12830.0,31023.0,1561.368302,2568.450857,2.164413
4,400116,50270.0,123,430,9189.0,41081.0,154,399,11639.0,38631.0,...,332,17854.0,32416.0,190,363,14747.0,35523.0,1216.228371,2000.69567,1.470755


In [28]:
with pd.ExcelWriter(f'output/pums_rent_gap_marketdata.xlsx') as writer:
    table_2.to_excel(writer,sheet_name='can_cannot_withinpuma_ami.xlsx')
    table_3.to_excel(writer,sheet_name='can_cannot_withinpuma.xlsx')
    table_sf.to_excel(writer,sheet_name='can_cannot_all_sf.xlsx')
    table_mf.to_excel(writer,sheet_name='can_cannot_all_mf.xlsx')