# ACS Cost Burden for Renters and Homeowners in Phoenix Urban Villages, City of Phoenix, U.S.

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For income as a percentage of rent, or owner costs for with and without a mortgage
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

** Note: Census Block Groups (which are aggregated to Phoenix Urban Village areas) are not available until 2013. 

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from acs_cost_vars import *

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols_rent = 'group(B25070)'
cols_own = 'group(B25091)'

source = 'acs/acs5'

rent_vars = ['GEO_ID']+rent_vars  #imported from acs vars
own_vars = ['GEO_ID']+own_vars    #"

##  RENT BURDEN 

In [5]:
def group_rent(df,year):
    df['r_u30_E'] = df.loc[:,r_u30_e].sum(axis=1)
    df['r_u30_M'] = df.apply(lambda x: (calc.get_moe(x[r_u30_m])),axis=1)
    df['r_3050_E'] = df.loc[:,r_3050_e].sum(axis=1)
    df['r_3050_M'] = df.apply(lambda x: (calc.get_moe(x[r_3050_m])),axis=1)
    df = df.rename({'B25070_001E':'r_tot_E','B25070_001M':'r_tot_M',
                    'B25070_010E':'r_o50_E','B25070_010M':'r_o50_M',
                    'B25070_011E':'r_na_E', 'B25070_011M':'r_na_M'},axis=1)
    
    df = df.drop(r_u30_e+r_u30_m+r_3050_e+r_3050_m,axis=1)
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [6]:
def rent_uvil(geo_df,year):
    df = get.get_bgp(source,year,cols_rent)
    df = get.clean_data(df,rent_vars)
    df = group_rent(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [7]:
def rent_change(df,change_vars,year1,year0):
    for var in change_vars:
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}E'] = df[f'r_{var}_{year1[-2:]}E'] - df[f'r_{var}_{year0[-2:]}E']
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}M'] = df.apply(lambda x: (calc.get_moe([x[f'r_{var}_{year0[-2:]}M'],\
                                                                                x[f'r_{var}_{year1[-2:]}M']])),axis=1)
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}C'] = df.apply(lambda x: (calc.get_cv(x[f'r_{var}_{year0[-2:]}{year1[-2:]}E'],\
                                                                            x[f'r_{var}_{year0[-2:]}{year1[-2:]}M'])),axis=1)
        for  y  in [year0[-2:],year1[-2:]]:
            df[f'r_{var}_{y}P'] = df[f'r_{var}_{y}E'] / df[f'r_tot_{y}E']
        
    return  df

####  Urban Villages

In [8]:
dfY1 = rent_uvil(bgp_20,y1)
dfY0 = rent_uvil(bgp_10,y0)
df_uv = pd.merge(dfY0,dfY1,on='name',how='left')
df_uv = rent_change(df_uv,['u30','3050','o50','na'],y1,y0)

In [9]:
df_uv.head()

Unnamed: 0,name,r_u30_13E,r_u30_13M,r_u30_13C,r_3050_13E,r_3050_13M,r_3050_13C,r_o50_13E,r_o50_13M,r_o50_13C,...,r_o50_1321E,r_o50_1321M,r_o50_1321C,r_o50_13P,r_o50_21P,r_na_1321E,r_na_1321M,r_na_1321C,r_na_13P,r_na_21P
0,Laveen,1449.0,304.205523,12.762413,1033.0,293.600068,17.277859,1132.0,281.465806,15.11518,...,-74.0,380.267011,312.385617,0.294332,0.247023,-123.0,168.949697,83.499986,0.060322,0.025449
1,Deer Valley,11736.0,921.534047,4.773373,5979.0,623.671388,6.341053,4496.0,497.294681,6.723906,...,766.0,866.01963,68.727898,0.19366,0.198252,125.0,465.665116,226.463278,0.043289,0.042574
2,Maryvale,10756.0,800.092495,4.521927,7943.0,687.452544,5.26129,9413.0,710.069011,4.58571,...,-2331.0,1005.710197,26.22797,0.311122,0.223541,-603.0,535.111203,53.946196,0.070831,0.04861
3,North Mountain,12834.0,917.15593,4.344254,7054.0,696.89454,6.005729,6782.0,635.919806,5.70005,...,835.0,1008.883046,73.449433,0.23861,0.240322,48.0,562.500667,712.386863,0.061675,0.056823
4,Paradise Valley,11917.0,891.158235,4.545922,5307.0,588.318791,6.739035,5205.0,576.446008,6.732432,...,95.0,946.788255,605.847548,0.220401,0.220081,-151.0,447.912938,180.322848,0.050263,0.04302


## City of Phoenix & U.S.

In [10]:
def make_table_r(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_rent)
    elif geo == 'us':
        df = get.get_us(source,year,cols_rent)
    else:
        pass
    df = get.clean_data(df,rent_vars)
    df = group_rent(df,year)
    return df

##### City of Phoenix

In [11]:
df1 = make_table_r(y1,"phoenix")
df0 = make_table_r(y0,"phoenix")
df_phx = pd.merge(df0,df1,on='GEO_ID',how='left')
df_phx = geo_agg.sumgeo_cv(df_phx,'GEO_ID') #calc cvs
df_phx =  rent_change(df_phx,['u30','3050','o50','na'],y1,y0)

In [12]:
df_phx

Unnamed: 0,GEO_ID,r_u30_13E,r_u30_13M,r_u30_13C,r_o50_21E,r_o50_21M,r_o50_21C,r_3050_21E,r_3050_21M,r_3050_21C,...,r_o50_1321E,r_o50_1321M,r_o50_1321C,r_o50_13P,r_o50_21P,r_na_1321E,r_na_1321M,r_na_1321C,r_na_13P,r_na_21P
0,455000,101193.0,2479.652193,1.489616,54756.0,2071.0,2.29923,62095.0,2422.473736,2.371573,...,-1897.0,2715.895801,87.032182,0.246029,0.214883,-4424.0,1304.334696,17.922889,0.070847,0.046661


##### U.S.

In [13]:
df_u1 = make_table_r(y1,"us")
df_u0 = make_table_r(y0,"us")
df_us = pd.merge(df_u0,df_u1,on='GEO_ID',how='left')
df_us = geo_agg.sumgeo_cv(df_us,'GEO_ID') #calc cvs
df_us =  rent_change(df_us,['u30','3050','o50','na'],y1,y0)

In [14]:
df_us

Unnamed: 0,GEO_ID,r_u30_13E,r_u30_13M,r_u30_13C,r_o50_21E,r_o50_21M,r_o50_21C,r_3050_21E,r_3050_21M,r_3050_21C,...,r_o50_1321E,r_o50_1321M,r_o50_1321C,r_o50_13P,r_o50_21P,r_na_1321E,r_na_1321M,r_na_1321C,r_na_13P,r_na_21P
0,0100000US,17871158.0,41145.231814,0.139959,10048573.0,45066.0,0.272633,10120829.0,33541.276571,0.201464,...,-77903.0,54730.425652,42.707952,0.249824,0.229112,-34839.0,22654.776472,39.530126,0.076031,0.069473


## OWNER BURDEN
*Note: judging by statistical reliability in the renter cohort, this code assess owner cost burden for all owners, with or without a mortgage. Hypothesis is that the more detailed cut for Urban Village areas (i.e., with a mortgage vs. without a mortgage) will not yield any statistically significant findings.

#### Urban Villages

In [15]:
def group_own(df,year):
    df['o_tot_E'] = df.loc[:,o_tot_e].sum(axis=1)
    df['o_tot_M'] = df.apply(lambda x: (calc.get_moe(x[o_tot_m])),axis=1)
    df['o_u30_E'] = df.loc[:,o_u30_e].sum(axis=1)
    df['o_u30_M'] = df.apply(lambda x: (calc.get_moe(x[o_u30_m])),axis=1)
    df['o_3050_E'] = df.loc[:,o_3050_e].sum(axis=1)
    df['o_3050_M'] = df.apply(lambda x: (calc.get_moe(x[o_3050_m])),axis=1)
    df['o_o50_E'] = df.loc[:,o_o50_e].sum(axis=1)
    df['o_o50_M'] = df.apply(lambda x: (calc.get_moe(x[o_o50_m])),axis=1)
    df['o_na_E'] = df.loc[:,o_na_e].sum(axis=1)
    df['o_na_M'] = df.apply(lambda x: (calc.get_moe(x[o_na_m])),axis=1)
    
    df = df.drop(own_vars[1:],axis=1).copy()
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [16]:
def own_uvil(geo_df,year):
    df = get.get_bgp(source,year,cols_own)
    df = get.clean_data(df,own_vars)
    df = group_own(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [17]:
def own_change(df,change_vars,year1,year0):
    for var in change_vars:
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}E'] = df[f'o_{var}_{year1[-2:]}E'] - df[f'o_{var}_{year0[-2:]}E']
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}M'] = df.apply(lambda x: (calc.get_moe([x[f'o_{var}_{year0[-2:]}M'],\
                                                                                x[f'o_{var}_{year1[-2:]}M']])),axis=1)
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}C'] = df.apply(lambda x: (calc.get_cv(x[f'o_{var}_{year0[-2:]}{year1[-2:]}E'],\
                                                                            x[f'o_{var}_{year0[-2:]}{year1[-2:]}M'])),axis=1)
        for  y  in [year0[-2:],year1[-2:]]:
            df[f'o_{var}_{y}P'] = df[f'o_{var}_{y}E'] / df[f'o_tot_{y}E']
        
    return  df

In [18]:
dfoY1 = own_uvil(bgp_20,y1)
dfoY0 = own_uvil(bgp_10,y0)

dfo_uv = pd.merge(dfoY0,dfoY1,on='name',how='left')
dfo_uv = own_change(dfo_uv,['u30','3050','o50','na'],y1,y0)

In [19]:
dfo_uv.head()

Unnamed: 0,name,o_o50_13E,o_o50_13M,o_o50_13C,o_tot_13E,o_tot_13M,o_tot_13C,o_3050_13E,o_3050_13M,o_3050_13C,...,o_o50_1321E,o_o50_1321M,o_o50_1321C,o_o50_13P,o_o50_21P,o_na_1321E,o_na_1321M,o_na_1321C,o_na_13P,o_na_21P
0,Laveen,1097.0,781.416662,43.302218,10609.0,753.536993,4.317817,2677.0,471.261074,10.701565,...,-177.0,1266.375932,434.93412,0.103403,0.064376,204.0,158.880458,47.345032,0.072109,0.067805
1,Deer Valley,4556.0,1493.436976,19.926787,41164.0,1268.238148,1.872912,7539.0,661.762042,5.336078,...,-1097.0,2333.840183,129.329793,0.110679,0.077753,494.0,329.654364,40.566354,0.090224,0.094589
2,Maryvale,5429.0,1055.025118,11.813458,28948.0,1074.641801,2.256728,6696.0,610.56695,5.54309,...,-2238.0,2033.547147,55.236768,0.187543,0.088149,-211.0,344.717275,99.314964,0.135312,0.102376
3,North Mountain,4633.0,1209.944627,15.875861,32822.0,1176.794375,2.179564,5818.0,590.864621,6.17374,...,-1217.0,2062.186703,103.00813,0.141155,0.097134,-50.0,327.844475,398.595107,0.110505,0.101712
4,Paradise Valley,6039.0,1335.466211,13.443179,45824.0,1267.832797,1.681911,8557.0,671.381412,4.769599,...,-1694.0,2124.687271,76.245762,0.131787,0.092991,120.0,341.486457,172.992126,0.091437,0.092242


### City of Phoenix and U.S.

##### City of Phoenix

In [20]:
def make_table_o(year,geo):
    if geo == 'phoenix':
        df = get.get_phx(source,year,cols_own)
    elif geo == 'us':
        df = get.get_us(source,year,cols_own)
    df = get.clean_data(df,own_vars)
    df = group_own(df,year)
    return df

In [21]:
dfo1 = make_table_o(y1,'phoenix')
dfo0 = make_table_o(y0,'phoenix')
df_phx_o = pd.merge(dfo0,dfo1,on='GEO_ID',how='left')
df_phx_o = geo_agg.sumgeo_cv(df_phx_o,'GEO_ID') #calc cvs
df_phx_o =  own_change(df_phx_o,['u30','3050','o50','na'],y1,y0)

In [22]:
df_phx_o

Unnamed: 0,GEO_ID,o_o50_13E,o_o50_13M,o_o50_13C,o_tot_13E,o_tot_13M,o_tot_13C,o_na_21E,o_na_21M,o_na_21C,...,o_o50_1321E,o_o50_1321M,o_o50_1321C,o_o50_13P,o_o50_21P,o_na_1321E,o_na_1321M,o_na_1321C,o_na_13P,o_na_21P
0,455000,39452.0,3585.268749,5.524421,287078.0,3501.115679,0.74138,2398.0,431.040601,10.927054,...,-11229.0,5881.696864,31.841657,0.137426,0.086825,-1427.0,638.717465,27.209397,0.013324,0.007377


##### U.S.

In [23]:
dfo1_u = make_table_o(y1,'us')
dfo0_u = make_table_o(y0,'us')
df_us_o = pd.merge(dfo0_u,dfo1_u,on='GEO_ID',how='left')
df_us_o = geo_agg.sumgeo_cv(df_us_o,'GEO_ID') #calc cvs
df_us_o =  own_change(df_us_o,['u30','3050','o50','na'],y1,y0)

In [24]:
df_us_o

Unnamed: 0,GEO_ID,o_o50_13E,o_o50_13M,o_o50_13C,o_tot_13E,o_tot_13M,o_tot_13C,o_na_21E,o_na_21M,o_na_21C,...,o_o50_1321E,o_o50_1321M,o_o50_1321C,o_o50_13P,o_o50_21P,o_na_1321E,o_na_1321M,o_na_1321C,o_na_13P,o_na_21P
0,0100000US,8553288.0,144589.760115,1.027634,75075700.0,285317.959482,0.231028,256308.0,6955.123363,1.649593,...,-1425670.0,214654.85175,9.152838,0.113929,0.088926,-14853.0,9000.243941,36.836147,0.003612,0.003198


In [26]:
with pd.ExcelWriter(f'output/HH_cost_ten.xlsx') as writer:
    df_uv.to_excel(writer, sheet_name="urban_vil_rent", index=False)
    df_phx.to_excel(writer, sheet_name="phoenix_rent", index=False)
    df_us.to_excel(writer, sheet_name="us_rent", index=False)
    dfo_uv.to_excel(writer, sheet_name="urban_vil_own", index=False)
    df_phx_o.to_excel(writer, sheet_name="phoenix_own", index=False)
    df_us_o.to_excel(writer, sheet_name="us_own", index=False)