# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2021'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

SMOCP - selected owner costs (monthly) (not '00000': none / '-1')
https://api.census.gov/data/2021/acs/acs1/pums/variables/SMOCP.json

In [4]:
#Income to afford median household sales price by PUMA
inc_needed = {'0400112':178306,'0400113':132721,'0400114':89658,'0400115':83532,\
              '0400116':88144,'0400117':116721,'0400118':83279,'0400119':79856,\
              '0400120':110559,'0400121':85225,'0400122':61405,'0400123':68829,\
              '0400125':73694,'0400128':90883,'0400129':127279}

In [5]:
#AMI bands in 2021 by household size
AMI_30pct = {'1':16600,'2':19000,'3':21960,'4':26500,'5':31040,'6':35580,\
            '7':40120,'8':44660}
AMI_50pct = {'1':27650,'2':31600,'3':35500,'4':39500,'5':42700,'6':45850,\
            '7':49000,'8':52150}
AMI_80pct = {'1':44250,'2':50600,'3':56900,'4':63200,'5':68300,'6':73350,\
            '7':78400,'8':83450}
AMI_100pct = {'1':55300,'2':63200,'3':71100,'4':79000,'5':85400,'6':91700,\
            '7':98000,'8':104300}

In [6]:
#Housing costs affordable to different AMI bands based on household size
unit_afford = {'1':[0,461,767,1217,1535,1000000000],\
              '2':[0,527,877,1392,1535,1000000000],\
              '3':[0,609,985,1565,1973,1000000000],\
              '4':[0,735,1096,1738,2192,1000000000],\
              '5':[0,861,1185,1878,2370,1000000000],\
              '6':[0,987,1272,2017,2545,1000000000],\
              '7':[0,1113,1360,2156,2720,1000000000],\
              '8':[0,1239,1447,2295,2894,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [7]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [8]:
df = get.get_puma(sample,y1,data_cols)

In [9]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [10]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],\
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)
df['HINCP'] = df.ADJINC * df.HINCP

In [11]:
df['inc_needed'] = df['GEO_ID'].map(inc_needed)

In [12]:
dff = df[~(df.HHSz.isna())&(df.HINCP!=-60000)&(df.HINCP!=0)].copy()

dff['aff_cost'] = np.where(dff.HINCP>=1,(dff.HINCP*0.333)/12,0)

In [13]:
dff['can_buy'] = np.where(dff.HINCP>=dff.inc_needed,'can buy','cannot afford')

In [14]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),'u30_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),'30_50_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),\
                   '50_80_ami',np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),\
                   '80_100_ami','o100_ami'))))

In [15]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,WGTP75,WGTP76,WGTP77,WGTP78,WGTP79,WGTP80,HHSz,inc_needed,can_buy,AMI_range
4,400119,-61795.68,1.0,0.0,1.029928,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,79856,cannot afford,u30_ami
33,400129,-61795.68,1.0,0.0,1.029928,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,127279,cannot afford,u30_ami
38,400120,-61795.68,1.0,0.0,1.029928,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1,110559,cannot afford,u30_ami


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened

In [16]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [17]:
drop_cols = ['HINCP','NP','ADJINC','HHSz','inc_needed']

In [18]:
table = dff.copy().drop(columns=drop_cols)

In [19]:
table = table.groupby(['GEO_ID','AMI_range','can_buy']).sum().reset_index()

In [20]:
table_2 = table.copy()
table_2 = make_est(table_2)
table_2 = table_2.drop(columns=repwts)

In [21]:
table_2 = pd.pivot_table(table_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','can_buy'],aggfunc=np.sum).reset_index()

In [22]:
table_2

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh,hh,hh,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
AMI_range,Unnamed: 1_level_1,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami,30_50_ami,...,o100_ami,u30_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami
can_buy,Unnamed: 1_level_2,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,cannot afford,cannot afford,...,cannot afford,cannot afford,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,cannot afford
0,400112,3262.0,,4626.0,,3816.0,18228.0,33044.0,2582.0,8.865188,...,2.227453,8.524136,782.53486,,1039.722507,,729.046806,1633.989087,1991.741433,595.577697
1,400113,4174.0,,4887.0,,3302.0,14510.0,18279.0,2451.0,7.651489,...,3.641996,11.991214,864.231714,,937.429486,,756.056478,1383.342135,1801.456373,795.313312
2,400114,4863.0,,8128.0,,4446.0,15721.0,11437.0,5056.0,7.759595,...,4.93078,7.751987,1021.116078,,1276.757109,,776.523378,1535.298866,1526.017501,1060.600658
3,400115,6871.0,,9951.0,251.0,4461.0,14035.0,6139.0,4928.0,6.708512,...,6.589258,7.406292,1247.320206,,1591.406532,249.649955,916.749327,1490.808058,1094.626528,987.650554
4,400116,5944.0,,9313.0,,5234.0,15617.0,13160.0,5710.0,6.625095,...,3.837577,6.071623,1065.620919,,1278.497185,,766.198417,1552.473663,1366.610519,938.15087
5,400117,3105.0,,8590.0,,4663.0,14037.0,18948.0,3940.0,9.373495,...,3.468393,7.586193,787.580493,,1318.943289,,1002.48637,1444.81964,1778.37577,808.820087
6,400118,6617.0,,7800.0,,6683.0,13365.0,12500.0,8048.0,6.709658,...,4.209712,5.922474,1201.41573,,1231.479868,,1011.40103,1341.249819,1423.948169,1289.801769
7,400119,3573.0,,6785.0,183.0,2810.0,13491.0,4404.0,5407.0,9.218694,...,6.668487,5.763985,891.321205,,970.206755,159.94231,792.629342,1489.396731,794.705825,843.356142
8,400120,1859.0,,5243.0,,2826.0,17708.0,13878.0,3481.0,11.13859,...,3.999594,9.943878,560.32682,,1105.334214,,774.658429,1417.973493,1502.016229,936.680753
9,400121,4550.0,,9585.0,620.0,5077.0,15128.0,4552.0,4670.0,7.958491,...,7.227814,7.551688,979.882329,,1511.168844,403.155907,1156.110079,1410.183482,890.309539,954.317158


In [23]:
table_2.to_excel('output/pums_own_gap_marketdata.xlsx')