# Phoenix Households by Building Size and Tenure (i.e., vacant or occupied) - Multifamily vs. Single Family - PUMS data

To determine occupancy/vacancy rate for single-family and multifamily units by PUMA [IF POSSIBLE!]

- https://www.census.gov/data/developers/data-sets/

-  https://api.census.gov/data/2021/acs/acs1/pums/variables.html

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_pums as get
import pums as calc

In [3]:
from AMI_BANDS_2021 import *
#from AMI_BANDS_2012 import *

year = '2021'

In [4]:
#parameters
sample = 'acs1'

phx_pumas = ['0400112','0400113','0400114','0400115','0400116','0400117',
             '0400118','0400119','0400120','0400121','0400122','0400123',
             '0400125','0400128','0400129']

data_cols = 'SERIALNO,ST,PUMA,WGTP,BLD,VACS'

In [5]:
bld_recode = {1.0:'sf',2.0:'sf',3.0:'sf',4.0:'mf',5.0:'mf',
             6.0:'mf',7.0:'mf',8.0:'mf',9.0:'mf',10.0:'oth',0.0:'na'}

Vacancy variables from Census API
https://api.census.gov/data/2021/acs/acs1/pums/variables/VACS.json

"3": "For sale only",
      "0": "N/A (GQ/occupied)",
      "2": "Rented, not occupied",
      "6": "For migrant workers",
      "4": "Sold, not occupied",
      "7": "Other vacant",
      "5": "For seasonal/recreational/occasional use",
      "1": "For Rent"

In [6]:
vacancy_recode = {0.0:'occ',1.0:'vac',2.0:'vac',3.0:'vac',4.0:'vac',5.0:'vac',6.0:'vac',7.0:'vac'}

In [7]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [8]:
raw = get.get_puma(sample,year,data_cols)

In [9]:
df = raw.copy()

In [10]:
df['ST'] = df['ST'].str.zfill(2)
df['PUMA'] = df['PUMA'].str.zfill(5)
df['GEO_ID'] = df['ST']+df['PUMA']

In [11]:
#make geo identifiers, reduce df to PHX PUMAs
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(phx_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col !='GEO_ID']] #move id to first col

#clean data cols to be numeric
for col in df.columns[1:]: df[col] = df[col].astype(float)

In [12]:
df['BSize'] = df['BLD'].map(bld_recode)
df['Occ'] = df['VACS'].map(vacancy_recode)

In [13]:
df.Occ.unique()

array(['occ', 'vac'], dtype=object)

In [14]:
df.BSize.unique()

array(['na', 'mf', 'sf', 'oth'], dtype=object)

In [15]:
df.Occ.value_counts()

occ    7142
vac     347
Name: Occ, dtype: int64

In [16]:
df.BSize.value_counts()

sf     5195
mf     1663
na      622
oth       9
Name: BSize, dtype: int64

In [19]:
dff = df.copy()

dff = df[~(df.HHSz.isna())].copy()

In [20]:
pivot = dff.drop(['BLD','VACS'],axis=1).copy()
pivot = pivot.groupby(['GEO_ID','Occ','BSize']).sum().reset_index()
pivot.rename(columns={'WGTP':'hh_e'},inplace=True)

In [21]:
pivot['hh_SE'] = pivot.apply(lambda x: (calc.get_se(x['hh_e'],x[repwts])),axis=1)
pivot['hh_MOE'] = pivot.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
pivot['hh_CV'] = pivot.apply(lambda x: (calc.get_cv(x['hh_e'],x['hh_SE'])),axis=1)
pivot = pivot.drop(columns=repwts)

In [23]:
pivot.head(12)

Unnamed: 0,GEO_ID,Occ,BSize,hh_e,hh_SE,hh_MOE,hh_CV
0,400112,occ,mf,9264.0,775.466585,1275.642533,5.088604
1,400112,occ,na,0.0,0.0,0.0,0.0
2,400112,occ,sf,48993.0,1311.516698,2157.444969,1.627323
3,400112,vac,mf,2424.0,541.14499,890.183508,13.571102
4,400112,vac,sf,5709.0,672.579215,1106.392808,7.161723
5,400113,occ,mf,10631.0,804.731757,1323.78374,4.601624
6,400113,occ,na,0.0,0.0,0.0,0.0
7,400113,occ,oth,269.0,139.12117,228.854325,31.439457
8,400113,occ,sf,32232.0,1149.53409,1890.983579,2.168047
9,400113,vac,mf,2386.0,490.791402,807.351856,12.504335


In [24]:
final = pd.pivot_table(pivot,values=['hh_e','hh_MOE','hh_CV'],index='GEO_ID',\
                      columns=['Occ','BSize'],aggfunc=np.sum).reset_index()

In [25]:
final.head(10)

Unnamed: 0_level_0,GEO_ID,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_e,hh_e,hh_e,hh_e,hh_e,hh_e
Occ,Unnamed: 1_level_1,occ,occ,occ,occ,vac,vac,occ,occ,occ,occ,vac,vac,occ,occ,occ,occ,vac,vac
BSize,Unnamed: 1_level_2,mf,na,oth,sf,mf,sf,mf,na,oth,sf,mf,sf,mf,na,oth,sf,mf,sf
0,400112,5.088604,0.0,,1.627323,13.571102,7.161723,1275.642533,0.0,,2157.444969,890.183508,1106.392808,9264.0,0.0,,48993.0,2424.0,5709.0
1,400113,4.601624,0.0,31.439457,2.168047,12.504335,10.465499,1323.78374,0.0,228.854325,1890.983579,807.351856,677.69523,10631.0,0.0,269.0,32232.0,2386.0,2393.0
2,400114,3.916574,0.0,,1.971734,14.058544,13.285215,1668.497924,0.0,,1699.323099,594.988962,490.00019,15743.0,0.0,,31849.0,1564.0,1363.0
3,400115,3.747735,0.0,,2.921256,11.446379,19.266046,1573.14417,0.0,,2256.875284,549.172354,526.557472,15512.0,0.0,,28550.0,1773.0,1010.0
4,400116,3.083457,0.0,42.164872,2.379116,9.298747,12.347473,2090.900641,0.0,261.287162,1634.401752,820.55376,483.479884,25059.0,0.0,229.0,25387.0,3261.0,1447.0
5,400117,2.941089,0.0,,2.404474,8.931555,9.418352,1697.184336,0.0,,1736.863047,666.581321,804.857206,21325.0,0.0,,26694.0,2758.0,3158.0
6,400118,2.42998,0.0,,2.706494,7.625012,10.426155,1875.554615,0.0,,1528.924916,926.236565,631.698849,28523.0,0.0,,20876.0,4489.0,2239.0
7,400119,6.152877,0.0,,1.974457,21.690076,11.983551,974.015599,0.0,,1580.598766,316.36006,489.983898,5850.0,0.0,,29583.0,539.0,1511.0
8,400120,6.108457,0.0,,1.885597,16.367664,16.706809,1381.877597,0.0,,1760.710498,499.163046,650.10605,8360.0,0.0,,34507.0,1127.0,1438.0
9,400121,7.476878,0.0,43.034017,1.71295,43.76641,12.805096,844.914181,0.0,125.767216,1830.428319,116.064339,459.124554,4176.0,0.0,108.0,39489.0,98.0,1325.0


In [26]:
final.to_excel(f'../../output/acs/occ_bsize_pumas_{year[-2:]}.xlsx')