# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2021'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

SMOCP - selected owner costs (monthly) (not '00000': none / '-1')
https://api.census.gov/data/2021/acs/acs1/pums/variables/SMOCP.json

In [4]:
#Income to afford median household sales price by PUMA, conventional loan
inc_needed = {'0400112':176800,'0400113':131700,'0400114':89000,'0400115':82900,\
              '0400116':87500,'0400117':115800,'0400118':82700,'0400119':79300,\
              '0400120':109700,'0400121':84600,'0400122':61000,'0400123':68300,\
              '0400125':73200,'0400128':90200,'0400129':126300}

In [5]:
#Income to afford median household sales price by PUMA, conventional loan
inc_needed_fha = {'0400112':195470,'0400113':145650,'0400114':98410,'0400115':91670,\
              '0400116':96710,'0400117':128070,'0400118':91400,'0400119':87630,\
              '0400120':121330,'0400121':93560,'0400122':67400,'0400123':75490,\
              '0400125':80890,'0400128':99760,'0400129':139660}

In [6]:
#AMI bands in 2021 by household size
AMI_30pct = {'1':16600,'2':19000,'3':21960,'4':26500,'5':31040,'6':35580,\
            '7':40120,'8':44660}
AMI_50pct = {'1':27650,'2':31600,'3':35500,'4':39500,'5':42700,'6':45850,\
            '7':49000,'8':52150}
AMI_80pct = {'1':44250,'2':50600,'3':56900,'4':63200,'5':68300,'6':73350,\
            '7':78400,'8':83450}
AMI_100pct = {'1':55300,'2':63200,'3':71100,'4':79000,'5':85400,'6':91700,\
            '7':98000,'8':104300}

In [7]:
#Housing costs affordable to different AMI bands based on household size
unit_afford = {'1':[0,461,767,1217,1535,1000000000],\
              '2':[0,527,877,1392,1535,1000000000],\
              '3':[0,609,985,1565,1973,1000000000],\
              '4':[0,735,1096,1738,2192,1000000000],\
              '5':[0,861,1185,1878,2370,1000000000],\
              '6':[0,987,1272,2017,2545,1000000000],\
              '7':[0,1113,1360,2156,2720,1000000000],\
              '8':[0,1239,1447,2295,2894,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [8]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [9]:
df = get.get_puma(sample,y1,data_cols)

In [10]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [11]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],\
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)

In [12]:
df['inc_needed'] = df['GEO_ID'].map(inc_needed)
df['inc_needed_fha'] = df['GEO_ID'].map(inc_needed_fha)

In [13]:
dff = df[~(df.HHSz.isna())&(df.HINCP!=-60000)&(df.HINCP!=0)].copy()
dff['HINCP'] = dff.ADJINC * dff.HINCP

dff['aff_cost'] = np.where(dff.HINCP>=1,(dff.HINCP*0.3)/12,0)

In [14]:
dff['can_buy'] = np.where(dff.HINCP>=dff.inc_needed,'can buy','cannot afford')
dff['can_buy_fha'] = np.where(dff.HINCP>=dff.inc_needed_fha,'can buy fha','cannot afford fha')

In [15]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),'u30_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),'30_50_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),\
                   '50_80_ami',np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),\
                   '80_100_ami','o100_ami'))))

In [16]:
df_fha = dff.copy()

In [17]:
#make a column for each PUMA that assesses the number of households 
own_cols=[]
for k in inc_needed.keys():
    dff[f'aff_{k}_can']=np.where(dff.HINCP>=inc_needed[k],1,0)
    dff[f'aff_{k}_cant']=np.where(dff.HINCP<inc_needed[k],1,0)
    dff[f'aff_{k}_can_wt']=dff[f'aff_{k}_can']*dff.WGTP
    dff[f'aff_{k}_cant_wt']=dff[f'aff_{k}_cant']*dff.WGTP
    own_cols.append(f'aff_{k}_can')
    own_cols.append(f'aff_{k}_cant')
    own_cols.append(f'aff_{k}_can_wt')
    own_cols.append(f'aff_{k}_cant_wt')

In [18]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,aff_0400125_can_wt,aff_0400125_cant_wt,aff_0400128_can,aff_0400128_cant,aff_0400128_can_wt,aff_0400128_cant_wt,aff_0400129_can,aff_0400129_cant,aff_0400129_can_wt,aff_0400129_cant_wt
3601,400122,297649.192,2.0,72.0,1.029928,69.0,19.0,68.0,74.0,21.0,...,72.0,0.0,1,0,72.0,0.0,1,0,72.0,0.0
3602,400112,76214.672,2.0,55.0,1.029928,56.0,96.0,91.0,53.0,16.0,...,55.0,0.0,0,1,0.0,55.0,0,1,0.0,55.0
3606,400119,40476.1704,3.0,50.0,1.029928,51.0,87.0,16.0,92.0,87.0,...,0.0,50.0,0,1,0.0,50.0,0,1,0.0,50.0


In [19]:
#make a column for each PUMA that assesses the number of households 
own_cols_fha=[]
for k in inc_needed_fha.keys():
    df_fha[f'aff_{k}_fha_can']=np.where(df_fha.HINCP>=inc_needed_fha[k],1,0)
    df_fha[f'aff_{k}_fha_cant']=np.where(df_fha.HINCP<inc_needed_fha[k],1,0)
    df_fha[f'aff_{k}_fha_can_wt']=df_fha[f'aff_{k}_fha_can']*df_fha.WGTP
    df_fha[f'aff_{k}_fha_cant_wt']=df_fha[f'aff_{k}_fha_cant']*df_fha.WGTP
    own_cols_fha.append(f'aff_{k}_fha_can')
    own_cols_fha.append(f'aff_{k}_fha_cant')
    own_cols_fha.append(f'aff_{k}_fha_can_wt')
    own_cols_fha.append(f'aff_{k}_fha_cant_wt')

In [20]:
df_fha.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,...,aff_0400125_fha_can_wt,aff_0400125_fha_cant_wt,aff_0400128_fha_can,aff_0400128_fha_cant,aff_0400128_fha_can_wt,aff_0400128_fha_cant_wt,aff_0400129_fha_can,aff_0400129_fha_cant,aff_0400129_fha_can_wt,aff_0400129_fha_cant_wt
3601,400122,297649.192,2.0,72.0,1.029928,69.0,19.0,68.0,74.0,21.0,...,72.0,0.0,1,0,72.0,0.0,1,0,72.0,0.0
3602,400112,76214.672,2.0,55.0,1.029928,56.0,96.0,91.0,53.0,16.0,...,0.0,55.0,0,1,0.0,55.0,0,1,0.0,55.0
3606,400119,40476.1704,3.0,50.0,1.029928,51.0,87.0,16.0,92.0,87.0,...,0.0,50.0,0,1,0.0,50.0,0,1,0.0,50.0


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened with conventional loan terms

In [21]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [22]:
drop_cols = ['HINCP','NP','ADJINC','HHSz','inc_needed','inc_needed_fha']

In [23]:
table = dff.copy().drop(columns=drop_cols)

In [24]:
table_2 = table.copy().drop(columns=own_cols).groupby(['GEO_ID','AMI_range','can_buy']).sum().reset_index()
table_2 = make_est(table_2)
table_2 = table_2.drop(columns=repwts)
table_2 = pd.pivot_table(table_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','can_buy'],aggfunc=np.sum).reset_index()

In [25]:
table_2

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh,hh,hh,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
AMI_range,Unnamed: 1_level_1,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami,30_50_ami,...,o100_ami,u30_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami
can_buy,Unnamed: 1_level_2,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,cannot afford,cannot afford,...,cannot afford,cannot afford,cannot afford,can buy,cannot afford,can buy,cannot afford,can buy,cannot afford,cannot afford
0,400112,3262.0,,4626.0,,3816.0,18362.0,24777.0,2582.0,8.865188,...,2.55573,8.524136,782.53486,,1039.722507,,729.046806,1677.335283,1713.544632,595.577697
1,400113,4174.0,,4887.0,,3302.0,14687.0,13323.0,2451.0,7.651489,...,3.941739,11.991214,864.231714,,937.429486,,756.056478,1445.396149,1421.090546,795.313312
2,400114,4863.0,,8128.0,,4446.0,16102.0,8129.0,5056.0,7.759595,...,5.750566,7.751987,1021.116078,,1276.757109,,776.523378,1542.505873,1264.967859,1060.600658
3,400115,6871.0,,9951.0,251.0,4461.0,14080.0,3311.0,4928.0,6.708512,...,8.701682,7.406292,1247.320206,,1591.406532,249.649955,916.749327,1501.556054,779.64011,987.650554
4,400116,5944.0,,9313.0,,5234.0,15849.0,8220.0,5710.0,6.625095,...,5.260362,6.071623,1065.620919,,1278.497185,,766.198417,1566.261506,1170.089928,938.15087
5,400117,3105.0,,8590.0,,4663.0,14440.0,12629.0,3940.0,9.373495,...,4.08701,7.586193,787.580493,,1318.943289,,1002.48637,1472.02669,1396.710884,808.820087
6,400118,6617.0,,7800.0,,6683.0,13365.0,5772.0,8048.0,6.709658,...,5.918391,5.922474,1201.41573,,1231.479868,,1011.40103,1341.249819,924.403868,1289.801769
7,400119,3573.0,,6785.0,321.0,2672.0,13691.0,2154.0,5407.0,9.218694,...,10.381007,5.763985,891.321205,,970.206755,207.81289,789.079683,1444.966937,605.085864,843.356142
8,400120,1859.0,,5243.0,,2826.0,17708.0,11313.0,3481.0,11.13859,...,4.472546,9.943878,560.32682,,1105.334214,,774.658429,1417.973493,1369.192209,936.680753
9,400121,4550.0,,9585.0,620.0,5077.0,15229.0,3028.0,4670.0,7.958491,...,9.202038,7.551688,979.882329,,1511.168844,403.155907,1156.110079,1394.991183,754.000649,954.317158


In [26]:
table_3 = table.copy().drop(columns=['AMI_range','can_buy']).groupby(['GEO_ID']).sum().reset_index()
table_3 = make_est(table_3)
table_3 = table_3.drop(columns=repwts)

In [32]:
table_3

Unnamed: 0,GEO_ID,hh,aff_0400112_can,aff_0400112_cant,aff_0400112_can_wt,aff_0400112_cant_wt,aff_0400113_can,aff_0400113_cant,aff_0400113_can_wt,aff_0400113_cant_wt,...,aff_0400128_cant,aff_0400128_can_wt,aff_0400128_cant_wt,aff_0400129_can,aff_0400129_cant,aff_0400129_can_wt,aff_0400129_cant_wt,hh_SE,hh_MOE,hh_CV
0,400112,57425.0,200,431,18362.0,39063.0,284,347,24955.0,32470.0,...,247,35600.0,21825.0,293,338,26056.0,31369.0,1356.414391,2231.301673,1.435904
1,400113,42824.0,121,375,9343.0,33481.0,183,313,14687.0,28137.0,...,233,21220.0,21604.0,188,308,15108.0,27716.0,1251.366213,2058.497421,1.776361
2,400114,46724.0,46,438,3970.0,42754.0,101,383,9282.0,37442.0,...,315,15721.0,31003.0,112,372,10372.0,36352.0,1310.525143,2155.81386,1.705059
3,400115,43853.0,32,362,3587.0,40266.0,50,344,5694.0,38159.0,...,269,13077.0,30776.0,60,334,6528.0,37325.0,1561.368302,2568.450857,2.164413
4,400116,50270.0,66,487,5302.0,44968.0,111,442,8280.0,41990.0,...,359,15299.0,34971.0,117,436,8807.0,41463.0,1216.228371,2000.69567,1.470755
5,400117,47367.0,101,356,8070.0,39297.0,144,313,12343.0,35024.0,...,249,19230.0,28137.0,147,310,12554.0,34813.0,1233.680986,2029.405222,1.583292
6,400118,48285.0,35,482,2713.0,45572.0,68,449,5577.0,42708.0,...,387,11838.0,36447.0,72,445,6194.0,42091.0,1418.724885,2333.802436,1.786159
7,400119,34603.0,18,297,2040.0,32563.0,47,268,4863.0,29740.0,...,213,11491.0,23112.0,50,265,5285.0,29318.0,1085.465292,1785.590405,1.906937
8,400120,42430.0,116,333,10077.0,32353.0,167,282,14570.0,27860.0,...,200,22689.0,19741.0,178,271,15642.0,26788.0,1286.138144,2115.697247,1.842675
9,400121,42759.0,29,337,2923.0,39836.0,65,301,6081.0,36678.0,...,223,13933.0,28826.0,71,295,6573.0,36186.0,1193.361743,1963.080067,1.696597


In [27]:
fha = df_fha.copy().drop(columns=drop_cols)

In [28]:
fha_2 = fha.copy().drop(columns=own_cols_fha).groupby(['GEO_ID','AMI_range','can_buy_fha']).sum().reset_index()
fha_2 = make_est(fha_2)
fha_2 = fha_2.drop(columns=repwts)
fha_2 = pd.pivot_table(fha_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','can_buy_fha'],aggfunc=np.sum).reset_index()

In [29]:
fha_2

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh,hh,hh,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
AMI_range,Unnamed: 1_level_1,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami,30_50_ami,...,o100_ami,u30_ami,30_50_ami,50_80_ami,50_80_ami,80_100_ami,80_100_ami,o100_ami,o100_ami,u30_ami
can_buy_fha,Unnamed: 1_level_2,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,cannot afford fha,cannot afford fha,...,cannot afford fha,cannot afford fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,can buy fha,cannot afford fha,cannot afford fha
0,400112,3262.0,,4626.0,,3816.0,15722.0,27417.0,2582.0,8.865188,...,2.402332,8.524136,782.53486,,1039.722507,,729.046806,1471.14841,1782.316147,595.577697
1,400113,4174.0,,4887.0,,3302.0,13044.0,14966.0,2451.0,7.651489,...,3.524238,11.991214,864.231714,,937.429486,,756.056478,1261.018981,1427.25887,795.313312
2,400114,4863.0,,8128.0,,4446.0,14429.0,9802.0,5056.0,7.759595,...,4.779416,7.751987,1021.116078,,1276.757109,,776.523378,1400.981998,1267.714156,1060.600658
3,400115,6871.0,,9951.0,,4712.0,12830.0,4561.0,4928.0,6.708512,...,7.485673,7.406292,1247.320206,,1591.406532,,885.502064,1475.504893,923.895252,987.650554
4,400116,5944.0,,9313.0,,5234.0,13417.0,10652.0,5710.0,6.625095,...,4.210651,6.071623,1065.620919,,1278.497185,,766.198417,1403.782969,1213.702527,938.15087
5,400117,3105.0,,8590.0,,4663.0,12554.0,14515.0,3940.0,9.373495,...,4.187234,7.586193,787.580493,,1318.943289,,1002.48637,1375.375612,1644.659902,808.820087
6,400118,6617.0,,7800.0,,6683.0,11564.0,7573.0,8048.0,6.709658,...,4.863863,5.922474,1201.41573,,1231.479868,,1011.40103,1229.75512,996.738201,1289.801769
7,400119,3573.0,,6785.0,62.0,2931.0,11755.0,4090.0,5407.0,9.218694,...,7.961949,5.763985,891.321205,,970.206755,72.653805,808.022845,1333.600419,881.200068,843.356142
8,400120,1859.0,,5243.0,,2826.0,16101.0,12920.0,3481.0,11.13859,...,4.227855,9.943878,560.32682,,1105.334214,,774.658429,1356.507087,1478.136076,936.680753
9,400121,4550.0,,9585.0,46.0,5651.0,12982.0,5275.0,4670.0,7.958491,...,6.353728,7.551688,979.882329,,1511.168844,56.089435,1251.008954,1334.342133,906.949048,954.317158


In [30]:
fha_3 = fha.copy().drop(columns=['AMI_range','can_buy_fha']).groupby(['GEO_ID']).sum().reset_index()
fha_3 = make_est(fha_3)
fha_3 = fha_3.drop(columns=repwts)

In [31]:
fha_3

Unnamed: 0,GEO_ID,hh,aff_0400112_fha_can,aff_0400112_fha_cant,aff_0400112_fha_can_wt,aff_0400112_fha_cant_wt,aff_0400113_fha_can,aff_0400113_fha_cant,aff_0400113_fha_can_wt,aff_0400113_fha_cant_wt,...,aff_0400128_fha_cant,aff_0400128_fha_can_wt,aff_0400128_fha_cant_wt,aff_0400129_fha_can,aff_0400129_fha_cant,aff_0400129_fha_can_wt,aff_0400129_fha_cant_wt,hh_SE,hh_MOE,hh_CV
0,400112,57425.0,175,456,15722.0,41703.0,254,377,23024.0,34401.0,...,276,33224.0,24201.0,267,364,23921.0,33504.0,1356.414391,2231.301673,1.435904
1,400113,42824.0,106,390,7872.0,34952.0,164,332,13044.0,29780.0,...,247,20092.0,22732.0,169,327,13710.0,29114.0,1251.366213,2058.497421,1.776361
2,400114,46724.0,33,451,2545.0,44179.0,76,408,6636.0,40088.0,...,331,14392.0,32332.0,85,399,7418.0,39306.0,1310.525143,2155.81386,1.705059
3,400115,43853.0,26,368,2752.0,41101.0,40,354,4490.0,39363.0,...,289,11152.0,32701.0,43,351,4770.0,39083.0,1561.368302,2568.450857,2.164413
4,400116,50270.0,56,497,4492.0,45778.0,93,460,7247.0,43023.0,...,387,12670.0,37600.0,100,453,7601.0,42669.0,1216.228371,2000.69567,1.470755
5,400117,47367.0,86,371,6764.0,40603.0,126,331,10401.0,36966.0,...,267,17053.0,30314.0,135,322,11660.0,35707.0,1233.680986,2029.405222,1.583292
6,400118,48285.0,27,490,2301.0,45984.0,51,466,3931.0,44354.0,...,401,10523.0,37762.0,57,460,4348.0,43937.0,1418.724885,2333.802436,1.786159
7,400119,34603.0,12,303,1192.0,33411.0,37,278,3565.0,31038.0,...,226,10144.0,24459.0,43,272,4095.0,30508.0,1085.465292,1785.590405,1.906937
8,400120,42430.0,95,354,8393.0,34037.0,154,295,13570.0,28860.0,...,223,20173.0,22257.0,159,290,13885.0,28545.0,1286.138144,2115.697247,1.842675
9,400121,42759.0,21,345,2076.0,40683.0,48,318,4578.0,38181.0,...,247,11424.0,31335.0,55,311,5121.0,37638.0,1193.361743,1963.080067,1.696597


In [34]:
with pd.ExcelWriter(f'../../output/pums_gap_own/pums_own_gap_marketdata_0820.xlsx') as writer:
    table_2.to_excel(writer,sheet_name='can_cannot_withinpuma.xlsx')
    table_3.to_excel(writer,sheet_name='can_cannot_atpuma.xlsx')
    fha_2.to_excel(writer,sheet_name='can_cannot_withinpuma_fha.xlsx')
    fha_3.to_excel(writer,sheet_name='can_cannot_atpuma_fha.xlsx')