# PUMS Household Income vs. AMI (2012) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2012/acs/acs1/pums/variables.html

2012 AMI by HH Size (from State of Arizona)
- https://www.huduser.gov/portal/datasets/il/il12/AZ.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2012'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC,ADJHSG,GRNTP'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

GRNTP - gross rent (monthly) (not 0: n/a,not paying rent, owned)
https://api.census.gov/data/2021/acs/acs1/pums/variables/GRNTP.json

In [4]:
#AMI bands in 2010 by household size
AMI_30pct = {'1':13950,'2':15950,'3':17950,'4':19900,'5':21500,'6':23100,\
            '7':24700,'8':26300}
AMI_50pct = {'1':23250,'2':26600,'3':29900,'4':33200,'5':35900,'6':38550,\
            '7':41200,'8':43850}
AMI_80pct = {'1':37200,'2':42500,'3':47800,'4':53100,'5':57350,'6':61600,\
            '7':65850,'8':70100}
AMI_100pct = {'1':46500,'2':53200,'3':59800,'4':66400,'5':71800,'6':77100,\
            '7':82400,'8':87700}

In [5]:
#Housing costs affordable to different AMI bands based on household size
unit_afford = {'1':[0,384,639,1023,1279,1000000000],\
              '2':[0,439,732,1169,1463,1000000000],\
              '3':[0,494,822,1315,1645,1000000000],\
              '4':[0,547,913,1460,1826,1000000000],\
              '5':[0,591,987,1577,1975,1000000000],\
              '6':[0,635,1060,1694,2120,1000000000],\
              '7':[0,679,1133,1806,2266,1000000000],\
              '8':[0,723,1206,1928,2412,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [6]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [7]:
df = get.get_puma(sample,y1,data_cols)

In [8]:
df['GEO_ID'] = df['ST'].astype(str).str.zfill(2)+df['PUMA'].astype(str).str.zfill(5)
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [9]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,20],\
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)

In [10]:
dff = df[~(df.HHSz.isna()) & (df.HINCP!=-60000)&(df.HINCP!=0)&(df.GRNTP!=0)].copy()
dff['HINCP'] = (dff.ADJINC/1000000) * dff.HINCP

In [11]:
dff['hou_cost'] = dff.GRNTP
dff['aff_cost'] = np.where(dff.HINCP>=1,(dff.HINCP*0.333)/12,0)

In [12]:
#assign AMI range based on household size and ami dictionaries
dff['AMI_range'] = np.where((dff['HINCP']<=dff['HHSz'].map(AMI_30pct)),'u30_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_30pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_50pct)),'30_50_ami',\
                   np.where((dff['HINCP']>dff['HHSz'].map(AMI_50pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_80pct)),\
                   '50_80_ami',np.where((dff['HINCP']>dff['HHSz'].map(AMI_80pct))&(dff['HINCP']<=dff['HHSz'].map(AMI_100pct)),\
                   '80_100_ami','o100_ami'))))

In [13]:
dff['cost_burdened'] = np.where(dff.hou_cost>dff.aff_cost,'burdened','not burdened')

In [14]:
#who is the unit affordable to based on the rent
dff['unit_aff'] = np.where(dff.HHSz=='1',pd.cut(dff['hou_cost'],bins=unit_afford['1'],labels=inc_lbls),\
                  np.where(dff.HHSz=='2',pd.cut(dff['hou_cost'],bins=unit_afford['2'],labels=inc_lbls),\
                  np.where(dff.HHSz=='3',pd.cut(dff['hou_cost'],bins=unit_afford['3'],labels=inc_lbls),\
                  np.where(dff.HHSz=='4',pd.cut(dff['hou_cost'],bins=unit_afford['4'],labels=inc_lbls),\
                  np.where(dff.HHSz=='5',pd.cut(dff['hou_cost'],bins=unit_afford['5'],labels=inc_lbls),\
                  np.where(dff.HHSz=='6',pd.cut(dff['hou_cost'],bins=unit_afford['6'],labels=inc_lbls),\
                  np.where(dff.HHSz=='7',pd.cut(dff['hou_cost'],bins=unit_afford['7'],labels=inc_lbls),\
                  np.where(dff.HHSz=='8',pd.cut(dff['hou_cost'],bins=unit_afford['8'],labels=inc_lbls),''))))))))

In [15]:
dff.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,ADJHSG,GRNTP,WGTP1,WGTP2,WGTP3,...,WGTP77,WGTP78,WGTP79,WGTP80,HHSz,hou_cost,aff_cost,AMI_range,cost_burdened,unit_aff
22,400116,40408.28,1.0,63.0,1010207.0,1000000.0,730.0,16.0,18.0,108.0,...,17.0,107.0,105.0,62.0,1,730.0,1121.32977,80_100_ami,not burdened,50_80_ami
29,400122,18587.8088,2.0,49.0,1010207.0,1000000.0,640.0,13.0,12.0,47.0,...,13.0,82.0,49.0,84.0,2,640.0,515.811694,30_50_ami,burdened,30_50_ami
98,400122,28083.7546,2.0,75.0,1010207.0,1000000.0,600.0,75.0,127.0,130.0,...,90.0,62.0,128.0,130.0,2,600.0,779.32419,50_80_ami,not burdened,30_50_ami


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened

In [16]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [17]:
drop_cols = ['HINCP','NP','ADJINC','ADJHSG','GRNTP','hou_cost', 'aff_cost','HHSz']

In [18]:
table = dff.copy().drop(columns=drop_cols)

In [19]:
table = table.groupby(['GEO_ID','AMI_range','unit_aff','cost_burdened']).sum().reset_index()

In [20]:
table_2 = table.copy().groupby(['GEO_ID','AMI_range','unit_aff','cost_burdened']).sum().reset_index()
table_2 = make_est(table_2)
table_2 = table_2.drop(columns=repwts)

In [21]:
rent_ami = table.copy().drop(columns=['unit_aff','cost_burdened']).groupby(['GEO_ID','AMI_range']).sum().reset_index()
rent_ami = make_est(rent_ami)
rent_ami = rent_ami.drop(columns=repwts)

In [22]:
rent_aff = table.copy().drop(columns=['AMI_range','cost_burdened']).groupby(['GEO_ID','unit_aff']).sum().reset_index()
rent_aff = make_est(rent_aff)
rent_aff = rent_aff.drop(columns=repwts)

In [23]:
cost_burd = table.copy().drop(columns=['unit_aff','AMI_range']).groupby(['GEO_ID','cost_burdened']).sum().reset_index()
cost_burd = make_est(cost_burd)
cost_burd = cost_burd.drop(columns=repwts)

In [24]:
cost_burd_ami = table.copy().drop(columns=['unit_aff']).groupby(['GEO_ID','cost_burdened','AMI_range']).sum().reset_index()
cost_burd_ami = make_est(cost_burd_ami)
cost_burd_ami = cost_burd_ami.drop(columns=repwts)

In [25]:
table_3 = table.copy().drop(columns='cost_burdened').groupby(['GEO_ID','AMI_range','unit_aff']).sum().reset_index()
table_3 = make_est(table_3)
table_3 = table_3.drop(columns=repwts)

In [26]:
table_2 = pd.pivot_table(table_2,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','unit_aff','cost_burdened'],aggfunc=np.sum).reset_index()

rent_ami = pd.pivot_table(rent_ami,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range'],aggfunc=np.sum).reset_index()

rent_aff = pd.pivot_table(rent_aff,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['unit_aff'],aggfunc=np.sum).reset_index()

cost_burd = pd.pivot_table(cost_burd,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['cost_burdened'],aggfunc=np.sum).reset_index()

cost_burd_ami = pd.pivot_table(cost_burd_ami,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','cost_burdened'],aggfunc=np.sum).reset_index()

In [27]:
table_3 = pd.pivot_table(table_3,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['AMI_range','unit_aff'],aggfunc=np.sum).reset_index()

In [28]:
with pd.ExcelWriter(f'output/pums_grossrent_gap_2012.xlsx') as writer:
    table_2.to_excel(writer,sheet_name='full_renter')
    rent_ami.to_excel(writer,sheet_name='hh_ami_lvl_renter')
    rent_aff.to_excel(writer,sheet_name='hh_afford_byAMI_renter')
    cost_burd.to_excel(writer,sheet_name='cost_burdened_renter')
    cost_burd_ami.to_excel(writer,sheet_name='cost_burdened_byAMI_renter')
    table_3.to_excel(writer,sheet_name='demand_supply_amilvl')