# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2021'
#y0 = '2013'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC,ADJHSG,GRNTP,GASP,FULP,WATP,ELEP'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

GRNTP - gross rent (monthly) (not 0: n/a,not paying rent, owned)
https://api.census.gov/data/2021/acs/acs1/pums/variables/GRNTP.json

GASP - gas cost (monthly) (not 3:N/a, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/GASP.json

FULP - fuel cost (yearly) (not 2:n/a, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/FULP.json

WATP - water cost (yearly) (not 2:n/a,included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/WATP.json

ELEP - electricity cost (monthly) (2:N/A, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/ELEP.json

In [4]:
#AMI bands in 2021 by household size
AMI_30pct = {'1':16600,'2':19000,'3':21960,'4':26500,'5':31040,'6':35580,\
            '7':40120,'8':44660}
AMI_50pct = {'1':27650,'2':31600,'3':35500,'4':39500,'5':42700,'6':45850,\
            '7':49000,'8':52150}
AMI_80pct = {'1':44250,'2':50600,'3':56900,'4':63200,'5':68300,'6':73350,\
            '7':78400,'8':83450}
AMI_100pct = {'1':55300,'2':63200,'3':71100,'4':79000,'5':85400,'6':91700,\
            '7':98000,'8':104300}

In [5]:
#Housing costs affordable to different AMI bands based on household size
aff_1p = {'30pct':461,'50pct':767,'80pct':1217,'100pct':1535}
aff_2p = {'30pct':527,'50pct':877,'80pct':1392,'100pct':1754}
aff_3p = {'30pct':609,'50pct':985,'80pct':1565,'100pct':1973}
aff_4p = {'30pct':735,'50pct':1096,'80pct':1738,'100pct':2192}
aff_5p = {'30pct':861,'50pct':1185,'80pct':1878,'100pct':2370}
aff_6p = {'30pct':987,'50pct':1272,'80pct':2017,'100pct':2545}
aff_7p = {'30pct':1113,'50pct':1360,'80pct':2156,'100pct':2720}
aff_8p = {'30pct':1239,'50pct':1447,'80pct':2295,'100pct':2894}

unit_afford = {'1':[0,461,767,1217,1535,1000000000],\
              '2':[0,527,877,1392,1535,1000000000],\
              '3':[0,609,985,1565,1973,1000000000],\
              '4':[0,735,1096,1738,2192,1000000000],\
              '5':[0,861,1185,1878,2370,1000000000],\
              '6':[0,987,1272,2017,2545,1000000000],\
              '7':[0,1113,1360,2156,2720,1000000000],\
              '8':[0,1239,1447,2295,2894,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [6]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [7]:
df = get.get_puma(sample,y1,data_cols)

In [8]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [9]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],\
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)
df['HINCP'] = df.ADJINC * df.HINCP

In [10]:
dff = df[(df.GRNTP!=0)].copy()

In [11]:
dff['gas'] = np.where(dff.GASP==3,0,dff.GASP)
dff['fuel'] = np.where(dff.FULP==2,0,(dff.FULP/12))
dff['water'] = np.where(dff.WATP==2,0,(dff.WATP/12))
dff['elec'] = np.where(dff.ELEP==2,0,dff.ELEP)

dff['utilities'] = dff.gas + dff.fuel + dff.water + dff.elec
dff['hou_cost'] = dff.GRNTP+dff.utilities
dff['aff_cost'] = np.where(dff.HINCP>=1,(dff.HINCP*0.333)/12,0)

In [12]:
#who is the unit affordable to based on the rent
dff['unit_aff'] = np.where(dff.HHSz=='1',pd.cut(dff['hou_cost'],bins=unit_afford['1'],labels=inc_lbls),\
                  np.where(dff.HHSz=='2',pd.cut(dff['hou_cost'],bins=unit_afford['2'],labels=inc_lbls),\
                  np.where(dff.HHSz=='3',pd.cut(dff['hou_cost'],bins=unit_afford['3'],labels=inc_lbls),\
                  np.where(dff.HHSz=='4',pd.cut(dff['hou_cost'],bins=unit_afford['4'],labels=inc_lbls),\
                  np.where(dff.HHSz=='5',pd.cut(dff['hou_cost'],bins=unit_afford['5'],labels=inc_lbls),\
                  np.where(dff.HHSz=='6',pd.cut(dff['hou_cost'],bins=unit_afford['6'],labels=inc_lbls),\
                  np.where(dff.HHSz=='7',pd.cut(dff['hou_cost'],bins=unit_afford['7'],labels=inc_lbls),\
                  np.where(dff.HHSz=='8',pd.cut(dff['hou_cost'],bins=unit_afford['8'],labels=inc_lbls),''))))))))

In [13]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,ADJHSG,GRNTP,GASP,FULP,WATP,...,WGTP80,HHSz,gas,fuel,water,elec,utilities,hou_cost,aff_cost,unit_aff
4,400114,30897.84,1.0,111.0,1.029928,1000000.0,1283.0,3.0,2.0,1000.0,...,30.0,1,0.0,0.0,83.333333,100.0,183.333333,1466.333333,857.41506,80_100_ami
5,400119,77244.6,2.0,102.0,1.029928,1000000.0,780.0,3.0,2.0,2.0,...,206.0,2,0.0,0.0,0.0,30.0,30.0,810.0,2143.53765,30_50_ami
13,400120,83115.1896,1.0,90.0,1.029928,1000000.0,2650.0,100.0,2.0,1200.0,...,145.0,1,100.0,0.0,100.0,150.0,350.0,3000.0,2306.446511,o100_ami


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened

In [14]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [15]:
drop_cols = ['HINCP','NP','ADJINC','ADJHSG','GRNTP','GASP','FULP','WATP',\
             'ELEP','gas', 'fuel', 'water', 'elec', 'utilities','hou_cost','HHSz']

In [16]:
table = dff.copy().drop(columns=drop_cols)

In [17]:
table = table.groupby(['GEO_ID','unit_aff']).sum().reset_index()

In [19]:
table.WGTP.sum()

246922.0

In [18]:
table.head()

Unnamed: 0,GEO_ID,unit_aff,WGTP,WGTP1,WGTP2,WGTP3,WGTP4,WGTP5,WGTP6,WGTP7,...,WGTP72,WGTP73,WGTP74,WGTP75,WGTP76,WGTP77,WGTP78,WGTP79,WGTP80,aff_cost
0,400112,50_80_ami,858.0,936.0,1312.0,681.0,842.0,949.0,930.0,1230.0,...,530.0,1033.0,807.0,663.0,829.0,620.0,750.0,789.0,1166.0,8597.015002
1,400112,80_100_ami,1566.0,2273.0,1702.0,1102.0,1072.0,1266.0,2084.0,1126.0,...,1742.0,1767.0,1967.0,1318.0,1651.0,1840.0,1836.0,1511.0,944.0,15627.818494
2,400112,o100_ami,9295.0,8787.0,8842.0,9627.0,9701.0,9406.0,8778.0,9794.0,...,9082.0,9735.0,8872.0,8332.0,10072.0,7293.0,8595.0,9323.0,9551.0,155858.623167
3,400112,u30_ami,118.0,113.0,34.0,122.0,117.0,35.0,201.0,117.0,...,125.0,36.0,200.0,118.0,116.0,116.0,200.0,115.0,35.0,1663.385216
4,400113,30_50_ami,492.0,337.0,391.0,409.0,511.0,652.0,298.0,201.0,...,550.0,668.0,552.0,670.0,224.0,437.0,560.0,691.0,612.0,4358.526555


In [20]:
table_2 = table.copy().groupby(['GEO_ID','unit_aff']).sum().reset_index()
table_2 = make_est(table_2)
table_2 = table_2.drop(columns=repwts)

In [21]:
table_2 = pd.pivot_table(table_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['unit_aff'],aggfunc=np.sum).reset_index()

In [22]:
table_2

Unnamed: 0_level_0,GEO_ID,hh,hh,hh,hh,hh,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
unit_aff,Unnamed: 1_level_1,30_50_ami,50_80_ami,80_100_ami,o100_ami,u30_ami,30_50_ami,50_80_ami,80_100_ami,o100_ami,u30_ami,30_50_ami,50_80_ami,80_100_ami,o100_ami,u30_ami
0,400112,,858.0,1566.0,9295.0,118.0,,25.544335,18.244677,5.927121,42.787027,,593.080563,773.142825,1490.819266,136.623662
1,400113,492.0,3298.0,2140.0,6215.0,282.0,26.03071,8.816339,12.159288,6.655883,40.708814,346.563574,786.811556,704.131437,1119.382794,310.64857
2,400114,1959.0,6711.0,4897.0,5039.0,307.0,11.395861,6.364517,7.523896,8.438801,20.986113,604.106345,1155.804879,997.021866,1150.68626,174.342064
3,400115,1294.0,7457.0,4950.0,5441.0,170.0,13.488104,6.226652,7.879582,6.882826,33.585426,472.298952,1256.465467,1055.456056,1013.391666,154.501103
4,400116,3214.0,11839.0,5055.0,6088.0,1700.0,8.558017,3.735452,7.439994,5.348041,14.449678,744.304832,1196.712881,1017.713521,881.051235,664.720207
5,400117,2004.0,7273.0,4977.0,9205.0,428.0,12.060488,6.888629,8.458761,5.020972,17.339721,654.025086,1355.745615,1139.216362,1250.671847,200.824959
6,400118,3393.0,11215.0,6586.0,9726.0,2527.0,8.686488,4.583531,6.21207,4.965147,9.942171,797.553614,1391.013182,1107.10767,1306.766946,679.858121
7,400119,1650.0,4104.0,1752.0,2789.0,1059.0,13.206806,6.235256,12.648417,11.141156,16.43398,589.676115,692.458007,599.655864,840.834634,470.945358
8,400120,427.0,1148.0,4282.0,6416.0,232.0,26.227556,21.773629,8.59795,6.303173,42.425326,303.052243,676.401417,996.261558,1094.347826,266.345262
9,400121,1622.0,4668.0,2184.0,1867.0,454.0,12.660056,7.201827,13.277579,12.937766,28.664463,555.671711,909.714932,784.699399,653.635166,352.153065


In [26]:
table_2.to_excel('output/pums_rent_revised.xlsx')