# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc
from AMI_BANDS_2021 import *

In [3]:
#Search parameters
y1 = '2021'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',
             '0400118','0400119','0400120','0400121','0400122','0400123',
             '0400125','0400128','0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC'

In [4]:
#assuming July 2021 $1 for HHInc equivalent to estimate 2023 Inc equivalent
inflation_adjust = 1.11

In [5]:
#Income to afford median household sales price by PUMA, conventional loan
inc_needed = {'0400112':176800,'0400113':131700,'0400114':89000,'0400115':82900,
              '0400116':87500,'0400117':115800,'0400118':82700,'0400119':79300,
              '0400120':109700,'0400121':84600,'0400122':61000,'0400123':68300,
              '0400125':73200,'0400128':90200,'0400129':126300}

In [6]:
#Income to afford median household sales price by PUMA, conventional loan
inc_needed_fha = {'0400112':195470,'0400113':145650,'0400114':98410,'0400115':91670,
              '0400116':96710,'0400117':128070,'0400118':91400,'0400119':87630,
              '0400120':121330,'0400121':93560,'0400122':67400,'0400123':75490,
              '0400125':80890,'0400128':99760,'0400129':139660}

In [7]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [8]:
df = get.get_puma(sample,y1,data_cols)

In [9]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [10]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)

In [11]:
df['inc_needed'] = df['GEO_ID'].map(inc_needed)
df['inc_needed_fha'] = df['GEO_ID'].map(inc_needed_fha)

In [12]:
dff = df[~(df.HHSz.isna())&(df.HINCP!=-60000)&(df.HINCP!=0)].copy()
dff['HINCP'] = dff.ADJINC * dff.HINCP # * inflation_adjust

In [13]:
dff['can_buy'] = np.where(dff.HINCP>=dff.inc_needed,'can buy','cannot afford')
dff['can_buy_fha'] = np.where(dff.HINCP>=dff.inc_needed_fha,'can buy fha','cannot afford fha')

In [14]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),inc_lbl[0],
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),inc_lbl[1],
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),
                   inc_lbl[2],np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),
                   inc_lbl[3],np.where((dff['HINCP']>dff['HHSz'].map(AMI_100pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_120pct)),
                   inc_lbl[4],inc_lbl[5])))))

In [15]:
df_fha = dff.copy()

In [17]:
#make a column for each PUMA that assesses the number of households 
own_cols=[]
for k in inc_needed.keys():
    dff[f'aff_{k}_can']=np.where(dff.HINCP>=inc_needed[k],1,0)
    dff[f'aff_{k}_cant']=np.where(dff.HINCP<inc_needed[k],1,0)
    dff[f'aff_{k}_can_wt']=dff[f'aff_{k}_can']*dff.WGTP
    dff[f'aff_{k}_cant_wt']=dff[f'aff_{k}_cant']*dff.WGTP
    own_cols = own_cols + [f'aff_{k}_can',f'aff_{k}_cant',
                          f'aff_{k}_can_wt',f'aff_{k}_cant_wt']

In [18]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,aff_0400125_can_wt,aff_0400125_cant_wt,aff_0400128_can,aff_0400128_cant,aff_0400128_can_wt,aff_0400128_cant_wt,aff_0400129_can,aff_0400129_cant,aff_0400129_can_wt,aff_0400129_cant_wt
0,400120,152944.308,4.0,43.0,1.029928,12.0,76.0,46.0,79.0,48.0,...,43.0,0.0,1,0,43.0,0.0,1,0,43.0,0.0
1,400119,31927.768,2.0,161.0,1.029928,174.0,49.0,180.0,167.0,56.0,...,0.0,161.0,0,1,0.0,161.0,0,1,0.0,161.0
4,400114,30897.84,1.0,111.0,1.029928,32.0,127.0,31.0,96.0,196.0,...,0.0,111.0,0,1,0.0,111.0,0,1,0.0,111.0


In [19]:
#make a column for each PUMA that assesses the number of households 
own_cols_fha=[]
for k in inc_needed_fha.keys():
    df_fha[f'aff_{k}_fha_can']=np.where(df_fha.HINCP>=inc_needed_fha[k],1,0)
    df_fha[f'aff_{k}_fha_cant']=np.where(df_fha.HINCP<inc_needed_fha[k],1,0)
    df_fha[f'aff_{k}_fha_can_wt']=df_fha[f'aff_{k}_fha_can']*df_fha.WGTP
    df_fha[f'aff_{k}_fha_cant_wt']=df_fha[f'aff_{k}_fha_cant']*df_fha.WGTP
    own_cols_fha = own_cols_fha + [f'aff_{k}_fha_can',f'aff_{k}_fha_cant',
                                  f'aff_{k}_fha_can_wt',f'aff_{k}_fha_cant_wt']

In [20]:
df_fha.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,aff_0400125_fha_can_wt,aff_0400125_fha_cant_wt,aff_0400128_fha_can,aff_0400128_fha_cant,aff_0400128_fha_can_wt,aff_0400128_fha_cant_wt,aff_0400129_fha_can,aff_0400129_fha_cant,aff_0400129_fha_can_wt,aff_0400129_fha_cant_wt
0,400120,152944.308,4.0,43.0,1.029928,12.0,76.0,46.0,79.0,48.0,...,43.0,0.0,1,0,43.0,0.0,1,0,43.0,0.0
1,400119,31927.768,2.0,161.0,1.029928,174.0,49.0,180.0,167.0,56.0,...,0.0,161.0,0,1,0.0,161.0,0,1,0.0,161.0
4,400114,30897.84,1.0,111.0,1.029928,32.0,127.0,31.0,96.0,196.0,...,0.0,111.0,0,1,0.0,111.0,0,1,0.0,111.0


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened with conventional loan terms

In [21]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [22]:
drop_cols = ['HINCP','NP','ADJINC','HHSz','inc_needed','inc_needed_fha']

In [23]:
table = dff.copy().drop(columns=drop_cols)

In [24]:
buy_within = table.copy().drop(columns=own_cols).groupby(['GEO_ID','AMI_range','can_buy']).sum().reset_index()
buy_within = make_est(buy_within)
buy_within = buy_within.drop(columns=repwts)
buy_within = pd.pivot_table(buy_within,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','can_buy'],aggfunc=np.sum).reset_index()

In [25]:
buy_within.head(3)

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh,hh,hh,hh,...,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
AMI_range,Unnamed: 1_level_1,100_120_ami,100_120_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o120_ami,o120_ami,...,100_120_ami,100_120_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o120_ami,o120_ami,u30_ami
can_buy,Unnamed: 1_level_2,can buy,cannot afford,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,...,can buy,cannot afford,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,cannot afford
0,400112,,2300.0,3262.0,,4626.0,,3816.0,18362.0,22477.0,...,,557.330576,782.53486,,1039.722507,,729.046806,1677.335283,1644.401337,595.577697
1,400113,,3764.0,4174.0,,4887.0,,3302.0,14687.0,9559.0,...,,747.834713,864.231714,,937.429486,,756.056478,1445.396149,1297.551506,795.313312
2,400114,,4214.0,4863.0,,8128.0,,4446.0,16102.0,3915.0,...,,909.957849,1021.116078,,1276.757109,,776.523378,1542.505873,938.557774,1060.600658


In [26]:
buy_all = table.copy().drop(columns=['AMI_range','can_buy']).groupby(['GEO_ID']).sum().reset_index()
buy_all = make_est(buy_all)
buy_all = buy_all.drop(columns=repwts)

In [27]:
buy_all.head(3)

Unnamed: 0,GEO_ID,hh,aff_0400112_can,aff_0400112_cant,aff_0400112_can_wt,aff_0400112_cant_wt,aff_0400113_can,aff_0400113_cant,aff_0400113_can_wt,aff_0400113_cant_wt,...,aff_0400128_cant,aff_0400128_can_wt,aff_0400128_cant_wt,aff_0400129_can,aff_0400129_cant,aff_0400129_can_wt,aff_0400129_cant_wt,hh_SE,hh_MOE,hh_CV
0,400112,57425.0,200,431,18362.0,39063.0,284,347,24955.0,32470.0,...,247,35600.0,21825.0,293,338,26056.0,31369.0,1356.414391,2231.301673,1.435904
1,400113,42824.0,121,375,9343.0,33481.0,183,313,14687.0,28137.0,...,233,21220.0,21604.0,188,308,15108.0,27716.0,1251.366213,2058.497421,1.776361
2,400114,46724.0,46,438,3970.0,42754.0,101,383,9282.0,37442.0,...,315,15721.0,31003.0,112,372,10372.0,36352.0,1310.525143,2155.81386,1.705059


In [28]:
fha = df_fha.copy().drop(columns=drop_cols)

In [29]:
fha_within = fha.copy().drop(columns=own_cols_fha).groupby(['GEO_ID','AMI_range','can_buy_fha']).sum().reset_index()
fha_within = make_est(fha_within)
fha_within = fha_within.drop(columns=repwts)
fha_within = pd.pivot_table(fha_within,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','can_buy_fha'],aggfunc=np.sum).reset_index()

In [30]:
fha_within.head(3)

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh,hh,hh,hh,...,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
AMI_range,Unnamed: 1_level_1,100_120_ami,100_120_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o120_ami,o120_ami,...,100_120_ami,100_120_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o120_ami,o120_ami,u30_ami
can_buy_fha,Unnamed: 1_level_2,can buy fha,cannot afford fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,...,can buy fha,cannot afford fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,cannot afford fha
0,400112,,2300.0,3262.0,,4626.0,,3816.0,15722.0,25117.0,...,,557.330576,782.53486,,1039.722507,,729.046806,1471.14841,1670.61186,595.577697
1,400113,,3764.0,4174.0,,4887.0,,3302.0,13044.0,11202.0,...,,747.834713,864.231714,,937.429486,,756.056478,1261.018981,1290.305483,795.313312
2,400114,,4214.0,4863.0,,8128.0,,4446.0,14429.0,5588.0,...,,909.957849,1021.116078,,1276.757109,,776.523378,1400.981998,1005.759545,1060.600658


In [31]:
fha_all = fha.copy().drop(columns=['AMI_range','can_buy_fha']).groupby(['GEO_ID']).sum().reset_index()
fha_all = make_est(fha_all)
fha_all = fha_all.drop(columns=repwts)

In [32]:
fha_all.head(3)

Unnamed: 0,GEO_ID,hh,aff_0400112_fha_can,aff_0400112_fha_cant,aff_0400112_fha_can_wt,aff_0400112_fha_cant_wt,aff_0400113_fha_can,aff_0400113_fha_cant,aff_0400113_fha_can_wt,aff_0400113_fha_cant_wt,...,aff_0400128_fha_cant,aff_0400128_fha_can_wt,aff_0400128_fha_cant_wt,aff_0400129_fha_can,aff_0400129_fha_cant,aff_0400129_fha_can_wt,aff_0400129_fha_cant_wt,hh_SE,hh_MOE,hh_CV
0,400112,57425.0,175,456,15722.0,41703.0,254,377,23024.0,34401.0,...,276,33224.0,24201.0,267,364,23921.0,33504.0,1356.414391,2231.301673,1.435904
1,400113,42824.0,106,390,7872.0,34952.0,164,332,13044.0,29780.0,...,247,20092.0,22732.0,169,327,13710.0,29114.0,1251.366213,2058.497421,1.776361
2,400114,46724.0,33,451,2545.0,44179.0,76,408,6636.0,40088.0,...,331,14392.0,32332.0,85,399,7418.0,39306.0,1310.525143,2155.81386,1.705059


In [33]:
with pd.ExcelWriter(f'../../output/pums_gap_own/pums_own_gap_marketdata_120ami_noInf.xlsx') as writer:
    buy_within.to_excel(writer,sheet_name='can_cannot_withinpuma.xlsx')
    buy_all.to_excel(writer,sheet_name='can_cannot_atpuma.xlsx')
    fha_within.to_excel(writer,sheet_name='can_cannot_withinpuma_fha.xlsx')
    fha_all.to_excel(writer,sheet_name='can_cannot_atpuma_fha.xlsx')