# PUMS Vacant Rental Units (for gap analysis)
- https://www.census.gov/data/developers/data-sets/
-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

Note - vacant units for rent have no financial data (no rent cost, no utility cost), but this notebook checks those variables

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
#Search parameters
y1 = '2021'
#y0 = '2013'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128','0400112','0400129']

#north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,WGTP,GRNTP,GASP,FULP,WATP,ELEP,VACS'

ADJHSG - adjustment factor for housing dollar amounts (6 decimal places)
1000000 = 1.000000

GRNTP - gross rent (monthly) (not 0: n/a,not paying rent, owned)
https://api.census.gov/data/2021/acs/acs1/pums/variables/GRNTP.json

GASP - gas cost (monthly) (not 3:N/a, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/GASP.json

FULP - fuel cost (yearly) (not 2:n/a, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/FULP.json

WATP - water cost (yearly) (not 2:n/a,included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/WATP.json

ELEP - electricity cost (monthly) (2:N/A, included in rent/condo)
https://api.census.gov/data/2021/acs/acs1/pums/variables/ELEP.json

VACSP - vacancy status (2 rented/not occupied, 1 for rent)
https://api.census.gov/data/2021/acs/acs1/pums/variables/VACS.json

In [4]:
#Housing costs affordable to different AMI bands based on household size
aff_1p = {'30pct':461,'50pct':767,'80pct':1217,'100pct':1535}
aff_2p = {'30pct':527,'50pct':877,'80pct':1392,'100pct':1754}
aff_3p = {'30pct':609,'50pct':985,'80pct':1565,'100pct':1973}
aff_4p = {'30pct':735,'50pct':1096,'80pct':1738,'100pct':2192}
aff_5p = {'30pct':861,'50pct':1185,'80pct':1878,'100pct':2370}
aff_6p = {'30pct':987,'50pct':1272,'80pct':2017,'100pct':2545}
aff_7p = {'30pct':1113,'50pct':1360,'80pct':2156,'100pct':2720}
aff_8p = {'30pct':1239,'50pct':1447,'80pct':2295,'100pct':2894}

unit_afford = {'1':[0,461,767,1217,1535,1000000000],\
              '2':[0,527,877,1392,1535,1000000000],\
              '3':[0,609,985,1565,1973,1000000000],\
              '4':[0,735,1096,1738,2192,1000000000],\
              '5':[0,861,1185,1878,2370,1000000000],\
              '6':[0,987,1272,2017,2545,1000000000],\
              '7':[0,1113,1360,2156,2720,1000000000],\
              '8':[0,1239,1447,2295,2894,1000000000]}

inc_lbls = ['u30_ami','30_50_ami','50_80_ami','80_100_ami','o100_ami']

In [5]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [6]:
df = get.get_puma(sample,y1,data_cols)

In [7]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [12]:
dff = df.copy() #[(df.GRNTP!=0)]

In [13]:
dff['gas'] = np.where(dff.GASP==3,0,dff.GASP)
dff['fuel'] = np.where(dff.FULP==2,0,(dff.FULP/12))
dff['water'] = np.where(dff.WATP==2,0,(dff.WATP/12))
dff['elec'] = np.where(dff.ELEP==2,0,dff.ELEP)

dff['utilities'] = dff.gas + dff.fuel + dff.water + dff.elec
dff['hou_cost'] = dff.GRNTP+dff.utilities

In [17]:
dff = dff[(dff.VACS==2.0)|(dff.VACS==1.0)].copy()

In [19]:
dff.head()

Unnamed: 0,GEO_ID,WGTP,ADJHSG,GRNTP,GASP,FULP,WATP,ELEP,VACS,WGTP1,...,WGTP77,WGTP78,WGTP79,WGTP80,gas,fuel,water,elec,utilities,hou_cost
3738,400114,148.0,1000000.0,0.0,3.0,2.0,2.0,2.0,1.0,268.0,...,172.0,149.0,146.0,258.0,0.0,0.0,0.0,0.0,0.0,0.0
4104,400128,153.0,1000000.0,0.0,3.0,2.0,2.0,2.0,1.0,260.0,...,45.0,43.0,255.0,161.0,0.0,0.0,0.0,0.0,0.0,0.0
4389,400119,89.0,1000000.0,0.0,3.0,2.0,2.0,2.0,1.0,152.0,...,151.0,91.0,89.0,26.0,0.0,0.0,0.0,0.0,0.0,0.0
5062,400121,139.0,1000000.0,0.0,3.0,2.0,2.0,2.0,1.0,37.0,...,147.0,142.0,40.0,237.0,0.0,0.0,0.0,0.0,0.0,0.0
5085,400123,246.0,1000000.0,0.0,3.0,2.0,2.0,2.0,1.0,251.0,...,277.0,277.0,231.0,77.0,0.0,0.0,0.0,0.0,0.0,0.0


### table by PUMA for renters by AMI range - cost burdened vs. not cost burdened

In [21]:
def make_est(df):
    df['hh_SE'] = df.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
    df['hh_MOE'] = df.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
    df['hh_CV'] = df.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)
    df.rename(columns={'WGTP':'hh'},inplace=True)
    return df

In [30]:
drop_cols = ['ADJHSG','GRNTP','GASP','FULP','WATP','ELEP',\
             'gas', 'fuel', 'water', 'elec', 'utilities','VACS','hou_cost']

In [31]:
table = dff.copy().drop(columns=drop_cols)

In [32]:
table = table.groupby(['GEO_ID']).sum().reset_index()

In [34]:
table = make_est(table)
table = table.drop(columns=repwts)

In [36]:
table

Unnamed: 0,GEO_ID,hh,hh_SE,hh_MOE,hh_CV
0,400112,690.0,314.600858,517.518412,27.716916
1,400113,830.0,288.047826,473.838674,21.096995
2,400114,1051.0,281.538008,463.130023,16.284275
3,400115,1202.0,272.551371,448.347006,13.784087
4,400116,1327.0,266.317386,438.0921,12.20008
5,400117,310.0,153.101274,251.851595,30.022801
6,400118,3163.0,500.29119,822.979008,9.615188
7,400119,359.0,133.53829,219.670488,22.612338
8,400120,736.0,239.168717,393.432539,19.754255
9,400121,276.0,119.569122,196.691206,26.335651


In [37]:
table.to_excel('output/pums_vacant_forrent.xlsx',index=False)