# PUMS Overcrowding, Vacancy Status in Phoenix  PUMAS

- https://www.census.gov/data/developers/data-sets/

In [29]:
import pandas as pd
import math
import numpy as np
import os

In [30]:
import getters
import get_pums as get
import pums as calc

In [31]:
#Search parameters
y1 = '2021'
#y0 = '2013'

sample = 'acs1'

phx_pumas = ['0400113','0400114','0400115','0400116','0400117',\
             '0400118','0400119','0400120','0400121','0400122','0400123',\
             '0400125','0400128']

north_pumas = ['0400112','0400129']

data_cols = 'SERIALNO,ST,PUMA,HINCP,NP,VACS,BDSP,TEN,WGTP'

In [32]:
# create a list of replicate weights
repwt = 'WGTP'
repwts = [repwt+str(i) for i in range(1, 81)]

## Get PUMA data

In [33]:
raw = get.get_puma(sample,y1,data_cols)

In [34]:
df = raw.copy()

In [35]:
df['GEO_ID'] = df['ST']+df['PUMA']
df = df[df.GEO_ID.isin(north_pumas)]
df  = df.drop(['SERIALNO','ST','PUMA'],axis=1)
df = df[['GEO_ID']+[col for col in df.columns if col != 'GEO_ID']] #move id to first col
for col in df.columns[1:]: df[col] = df[col].astype(int)

## Overcrowding

In [36]:
oc = df[(df.BDSP!=-1)&(df.NP>0)].copy()

#treat studios as 1-bedrooms (i.e., cannot div by 0), group 5+ bedrooms
oc['beds'] = np.where(oc.BDSP>=5,'5+',np.where(oc.BDSP==0,'1',oc.BDSP))

#group households with 5+ people
oc['pp'] = np.where(oc.NP>=5,'5+',oc.NP)

own = oc[(oc.TEN==1) | (oc.TEN==2)] #owner occupied
rent = oc[oc.TEN==3]

#### phoenix PUMA total

In [37]:
px_o = own.copy().drop(['GEO_ID','HINCP','VACS',\
                     'BDSP','NP','TEN'],axis=1).groupby(['pp','beds']).sum().reset_index()

px_o['hh_SE'] = px_o.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
px_o['hh_MOE'] = px_o.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
px_o['hh_CV'] = px_o.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)

px_o = px_o.drop(columns=repwts)

px_o_piv = pd.pivot_table(px_o,values=['WGTP','hh_MOE','hh_CV'],index='beds',\
                      columns=['pp'],aggfunc=np.sum).reset_index()

In [38]:
px_o_piv

Unnamed: 0_level_0,beds,WGTP,WGTP,WGTP,WGTP,WGTP,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
pp,Unnamed: 1_level_1,1,2,3,4,5+,1,2,3,4,5+,1,2,3,4,5+
0,1,811.0,539.0,,113.0,245.0,19.783364,12.962726,,43.110346,42.699373,434.162996,189.067508,,131.823171,283.086649
1,2,2535.0,5228.0,794.0,133.0,,8.936799,5.271502,15.630679,38.165185,,613.044128,745.764564,335.838242,137.357007,
2,3,7230.0,17121.0,4247.0,2921.0,1196.0,5.485147,3.453207,7.587137,8.723779,17.328636,1073.144909,1599.865637,871.950867,689.553558,560.824989
3,4,2936.0,15206.0,6600.0,5944.0,4169.0,8.551761,4.048419,5.786767,6.053491,8.300088,679.427916,1665.836096,1033.502981,973.680523,936.367683
4,5+,709.0,2929.0,1517.0,4225.0,2484.0,21.191658,9.222205,13.655617,7.865601,9.346546,406.57715,730.947101,560.568538,899.270625,628.252991


In [39]:
px_r = rent.copy().drop(['GEO_ID','HINCP','VACS',\
                     'BDSP','NP','TEN'],axis=1).groupby(['pp','beds']).sum().reset_index()

px_r['hh_SE'] = px_r.apply(lambda x: (calc.get_se(x['WGTP'],x[repwts])),axis=1)
px_r['hh_MOE'] = px_r.apply(lambda x: (calc.get_moe(x['hh_SE'])),axis=1)
px_r['hh_CV'] = px_r.apply(lambda x: (calc.get_cv(x['WGTP'],x['hh_SE'])),axis=1)

px_r = px_r.drop(columns=repwts)

px_r_piv = pd.pivot_table(px_r,values=['WGTP','hh_MOE','hh_CV'],index='beds',\
                      columns=['pp'],aggfunc=np.sum).reset_index()

In [40]:
px_r_piv

Unnamed: 0_level_0,beds,WGTP,WGTP,WGTP,WGTP,WGTP,hh_CV,hh_CV,hh_CV,hh_CV,hh_CV,hh_MOE,hh_MOE,hh_MOE,hh_MOE,hh_MOE
pp,Unnamed: 1_level_1,1,2,3,4,5+,1,2,3,4,5+,1,2,3,4,5+
0,1,4001.0,938.0,125.0,,,9.304987,19.947517,44.758289,,,1007.43294,506.318133,151.396313,,
1,2,1586.0,4060.0,1154.0,440.0,368.0,12.210662,8.67285,18.289846,28.190744,40.489523,524.051794,952.839324,571.146689,335.653372,403.201631
2,3,1244.0,472.0,2115.0,549.0,322.0,13.831269,18.834599,15.661189,26.371454,25.951542,465.601333,240.563745,896.32788,391.776358,226.126178
3,4,99.0,375.0,1070.0,233.0,680.0,43.743627,20.387704,18.37923,25.869699,18.041351,117.187634,206.886138,532.160805,163.109444,331.978363
4,5+,,274.0,122.0,,233.0,,30.237542,42.951339,,35.721873,,224.196512,141.797426,,225.227775


##### check against acs

In [41]:
cols = 'group(B25042)'
source = 'acs/acs1'
year = '2021'

In [48]:
pum = getters.get_puma(source,year,cols)
pum = pum[pum.GEO_ID.isin(north_pumas+phx_pumas)]
phx = getters.get_phx(source,year,cols)
dff = pd.concat([pum,phx])
dff = getters.clean_table(dff)

In [51]:
with pd.ExcelWriter(f'output/pums_northpx_crowd.xlsx') as writer:
    px_o_piv.to_excel(writer, sheet_name="px_own")
    px_r_piv.to_excel(writer, sheet_name="px_rent")
    dff.to_excel(writer, sheet_name="acs_qaqc",index=False)

## Vacancy

# future analysis - though likely not statistically significant
## Urban Villages, average # of people per bedroom

In [None]:
uv = oc.copy().drop(['HINCP','VACS','BDSP','NP'],axis=1).groupby(['GEO_ID','pp','beds']).sum().reset_index()