# PUMS Household Income vs. AMI (2021) in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

For households by income and household size to compare to HUD AMI in same year
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

2021 AMI by HH Size (from City of Phoenix)
- https://www.phoenix.gov/humanservicessite/Documents/2021%20AMI%20Limits%204.2.21.pdf

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
from AMI_BANDS_2021 import *

In [4]:
#Search parameters
y1 = '2021'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',
             '0400118','0400119','0400120','0400121','0400122','0400123',
             '0400125','0400128','0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,WGTP,ADJINC,ADJHSG,SMOCP,HHLDRRAC1P,HHLDRHISP'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

FHINCP - income flag - 1: yes | https://api.census.gov/data/2021/acs/acs1/pums/variables/FHINCP.json

HINCP - HHI in past 12 months (not -60000:n/a, 0:no income, -59999 loss of 59k+)
-1 to -59998 loss, 1+
https://api.census.gov/data/2021/acs/acs1/pums/variables/HINCP.json

SMOCP - selected owner costs (monthly) (not '00000': none / '-1')
https://api.census.gov/data/2021/acs/acs1/pums/variables/SMOCP.json

In [5]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [6]:
df = get.get_puma(sample,y1,data_cols)

In [7]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [8]:
df['HHSz'] = pd.cut(df['NP'],bins=[0,1,2,3,4,5,6,7,14],
                   labels=['1','2','3','4','5','6','7','8'])
df['HHSz'] = df['HHSz'].astype(str)
df['HINCP'] = df.ADJINC * df.HINCP

In [9]:
df['White_Alone']=np.where((df['HHLDRHISP']==1)&(df['HHLDRRAC1P']==1),'WhiteAlone','Other')

In [10]:
df.head(3)

Unnamed: 0,GEO_ID,HINCP,NP,WGTP,ADJINC,ADJHSG,SMOCP,HHLDRRAC1P,HHLDRHISP,WGTP1,...,WGTP73,WGTP74,WGTP75,WGTP76,WGTP77,WGTP78,WGTP79,WGTP80,HHSz,White_Alone
4,400119,-61795.68,1.0,0.0,1.029928,1000000.0,-1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Other
33,400129,-61795.68,1.0,0.0,1.029928,1000000.0,-1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Other
38,400120,-61795.68,1.0,0.0,1.029928,1000000.0,-1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,Other


In [15]:
#identify the records that are renter households, but with no other info
#will be included in supply but not demand
bad_records = df[(df.HHSz.isna())&(df.HINCP<0)|(df.HINCP==0)&(df.SMOCP==0)|(df.HINCP==0)&(df.SMOCP==-1)].copy()
print(f'there are {bad_records.WGTP.sum()} households with no size,income, or housing cost information') 

there are 5165.0 households with no size,income, or housing cost information


In [16]:
df_supply = df[(df.SMOCP!=0)|(df.SMOCP!=-1)].copy()

In [19]:
df_demand = df[~(df.HHSz.isna())&(df.HINCP>-60000)&(df.HINCP!=0)&(df.SMOCP!=0)&(df.SMOCP!=-1)].copy()

In [20]:
dfs = [df_supply,df_demand]

In [21]:
for frame in dfs:
    frame['hou_cost'] = frame.SMOCP
    frame['aff_cost'] = np.where(frame.HINCP>=1,(frame.HINCP*0.333)/12,0)
    
    #who is the unit affordable to based on the rent
    frame['unit_aff'] = np.where(frame.HHSz=='1',pd.cut(frame['hou_cost'],bins=affordable_rent['1'],labels=inc_lbl),
                  np.where(frame.HHSz=='2',pd.cut(frame['hou_cost'],bins=affordable_rent['2'],labels=inc_lbl),
                  np.where(frame.HHSz=='3',pd.cut(frame['hou_cost'],bins=affordable_rent['3'],labels=inc_lbl),
                  np.where(frame.HHSz=='4',pd.cut(frame['hou_cost'],bins=affordable_rent['4'],labels=inc_lbl),
                  np.where(frame.HHSz=='5',pd.cut(frame['hou_cost'],bins=affordable_rent['5'],labels=inc_lbl),
                  np.where(frame.HHSz=='6',pd.cut(frame['hou_cost'],bins=affordable_rent['6'],labels=inc_lbl),
                  np.where(frame.HHSz=='7',pd.cut(frame['hou_cost'],bins=affordable_rent['7'],labels=inc_lbl),
                  np.where(frame.HHSz=='8',pd.cut(frame['hou_cost'],bins=affordable_rent['8'],labels=inc_lbl),''))))))))

In [22]:
#assign AMI range based on household size and ami dictionaries
df_demand['AMI_range'] = np.where((df_demand['HINCP']<=df_demand['HHSz'].map(AMI_30pct)),inc_lbl[0],
                   np.where((df_demand['HINCP']>df_demand['HHSz'].map(AMI_30pct))&(df_demand['HINCP']<=df_demand['HHSz'].map(AMI_50pct)),inc_lbl[1],
                   np.where((df_demand['HINCP']>df_demand['HHSz'].map(AMI_50pct))&(df_demand['HINCP']<=df_demand['HHSz'].map(AMI_80pct)),
                   inc_lbl[2],np.where((df_demand['HINCP']>df_demand['HHSz'].map(AMI_80pct))&(df_demand['HINCP']<=df_demand['HHSz'].map(AMI_100pct)),
                   inc_lbl[3],np.where((df_demand['HINCP']>df_demand['HHSz'].map(AMI_100pct))&(df_demand['HINCP']<=df_demand['HHSz'].map(AMI_120pct)),
                                       inc_lbl[4],inc_lbl[5])))))

#assign cost burden
df_demand['cost_burdened'] = np.where(df_demand.hou_cost>df_demand.aff_cost,'burdened','not burdened')

### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened

In [23]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [29]:
drop_cols_supply = ['HINCP','NP','ADJINC','ADJHSG','SMOCP','hou_cost','HHSz','HHLDRHISP','HHLDRRAC1P']
drop_cols_demand = drop_cols_supply + ['aff_cost']

In [30]:
df_d = df_demand.copy().drop(columns=drop_cols_demand)
df_d = df_d.groupby(['GEO_ID','AMI_range','unit_aff','cost_burdened','White_Alone']).sum().reset_index()

In [31]:
own = df_d.copy().groupby(['GEO_ID','AMI_range','unit_aff','cost_burdened','White_Alone']).sum().reset_index()
own = make_est(own)
own = own.drop(columns=repwts)

In [33]:
own_ami = df_d.copy().drop(columns=['unit_aff','cost_burdened']).groupby(['GEO_ID','AMI_range','White_Alone']).sum().reset_index()
own_ami = make_est(own_ami)
own_ami = own_ami.drop(columns=repwts)

In [36]:
own_aff = df_d.copy().drop(columns=['AMI_range','cost_burdened']).groupby(['GEO_ID','unit_aff','White_Alone']).sum().reset_index()
own_aff = make_est(own_aff)
own_aff = own_aff.drop(columns=repwts)

In [37]:
cost_burd = df_d.copy().drop(columns=['unit_aff','AMI_range']).groupby(['GEO_ID','cost_burdened','White_Alone']).sum().reset_index()
cost_burd = make_est(cost_burd)
cost_burd = cost_burd.drop(columns=repwts)

In [38]:
cost_burd_ami = df_d.copy().drop(columns=['unit_aff']).groupby(['GEO_ID','cost_burdened','AMI_range','White_Alone']).sum().reset_index()
cost_burd_ami = make_est(cost_burd_ami)
cost_burd_ami = cost_burd_ami.drop(columns=repwts)

In [39]:
ami_aff = df_d.copy().drop(columns='cost_burdened').groupby(['GEO_ID','AMI_range','unit_aff','White_Alone']).sum().reset_index()
ami_aff = make_est(ami_aff)
ami_aff = ami_aff.drop(columns=repwts)

In [40]:
own = pd.pivot_table(own,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','unit_aff','cost_burdened','White_Alone'],aggfunc=np.sum).reset_index()

own_ami = pd.pivot_table(own_ami,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','White_Alone'],aggfunc=np.sum).reset_index()

own_aff = pd.pivot_table(own_aff,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['unit_aff','White_Alone'],aggfunc=np.sum).reset_index()

cost_burd = pd.pivot_table(cost_burd,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['cost_burdened','White_Alone'],aggfunc=np.sum).reset_index()

cost_burd_ami = pd.pivot_table(cost_burd_ami,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','cost_burdened','White_Alone'],aggfunc=np.sum).reset_index()

ami_aff = pd.pivot_table(ami_aff,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',
                          columns=['AMI_range','unit_aff','White_Alone'],aggfunc=np.sum).reset_index()

##### supply table

In [41]:
df_s = df_supply.copy().drop(columns=drop_cols_supply)
df_s = df_s.groupby(['GEO_ID','unit_aff']).sum().reset_index()

In [42]:
supply_aff = df_s.copy().groupby(['GEO_ID','unit_aff']).sum().reset_index()
supply_aff = make_est(supply_aff)
supply_aff = supply_aff.drop(columns=repwts)

supply_aff = pd.pivot_table(supply_aff,values=['hh','hh_MOE','hh_CV'],index='GEO_ID',\
                          columns=['unit_aff'],aggfunc=np.sum).reset_index()

In [43]:
with pd.ExcelWriter(f'../../output/pums_gap_own/pums_own_gap_whitenonwhite_033124.xlsx') as writer:
    own.to_excel(writer,sheet_name='full_owner')
    own_ami.to_excel(writer,sheet_name='hh_ami_lvl_owner')
    own_aff.to_excel(writer,sheet_name='hh_afford_byAMI_owner')
    cost_burd.to_excel(writer,sheet_name='cost_burdened_owner')
    cost_burd_ami.to_excel(writer,sheet_name='cost_burdened_byAMI_owner')
    ami_aff.to_excel(writer,sheet_name='demand_supply_amilvl')
    supply_aff.to_excel(writer,sheet_name='supply_amilvl')  