# ACS Cost Burden for Renters and Homeowners in Phoenix Urban Villages City of Phoenix

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For income as a percentage of rent, or owner costs for with and without a mortgage
- https://api.census.gov/data/2013/acs/acs5/variables.html
- https://api.census.gov/data/2021/acs/acs5/variables.html

** Note: Census Block Groups (which are aggregated to Phoenix Urban Village areas) are not available until 2013. 

In [1]:
import pandas as pd
import math
import numpy as np
import os

In [2]:
import get_acs as get
import utilcalcs as calc
import geo_agg
from acs_cost_vars import *

In [3]:
bgp_10 = pd.read_csv('../data/geo/bgp_vil_10.csv')
bgp_20 = pd.read_csv('../data/geo/bgp_vil_20.csv')
for df in [bgp_10,bgp_20]: df.geoid = df.geoid.apply(lambda x: '{0:0>12}'.format(x))
    
#get ride of area & geo stuff not being useed
bgp_20 = bgp_20.drop(['aland20','awater20','lat20','lon20','land_acre'],axis=1)
bgp_10 = bgp_10.drop(['aland10','awater10','lat10','lon10','land_acre'],axis=1)

In [4]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2021'
y0 = '2013'

cols_rent = 'group(B25070)'
cols_own = 'group(B25091)'

source = 'acs/acs5'

rent_vars = ['GEO_ID']+rent_vars  #imported from acs vars
own_vars = ['GEO_ID']+own_vars    #"

##  RENT BURDEN 

In [5]:
def group_rent(df,year):
    df['r_u30_E'] = df.loc[:,r_u30_e].sum(axis=1)
    df['r_u30_M'] = df.apply(lambda x: (calc.get_moe(x[r_u30_m])),axis=1)
    df['r_3050_E'] = df.loc[:,r_3050_e].sum(axis=1)
    df['r_3050_M'] = df.apply(lambda x: (calc.get_moe(x[r_3050_m])),axis=1)
    df = df.rename({'B25070_001E':'r_tot_E','B25070_001M':'r_tot_M',
                    'B25070_010E':'r_o50_E','B25070_010M':'r_o50_M',
                    'B25070_011E':'r_na_E', 'B25070_011M':'r_na_M'},axis=1)
    
    df = df.drop(r_u30_e+r_u30_m+r_3050_e+r_3050_m,axis=1)
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [6]:
def rent_uvil(geo_df,year):
    df = get.get_bgp(source,year,cols_rent)
    df = get.clean_data(df,rent_vars)
    df = group_rent(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [7]:
def rent_change(df,change_vars,year1,year0):
    for var in change_vars:
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}E'] = df[f'r_{var}_{year1[-2:]}E'] - df[f'r_{var}_{year0[-2:]}E']
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}M'] = df.apply(lambda x: (calc.get_moe([x[f'r_{var}_{year0[-2:]}M'],\
                                                                                x[f'r_{var}_{year1[-2:]}M']])),axis=1)
        df[f'r_{var}_{year0[-2:]}{year1[-2:]}C'] = df.apply(lambda x: (calc.get_cv(x[f'r_{var}_{year0[-2:]}{year1[-2:]}E'],\
                                                                            x[f'r_{var}_{year0[-2:]}{year1[-2:]}M'])),axis=1)
        for  y  in [year0[-2:],year1[-2:]]:
            df[f'r_{var}_{y}P'] = df[f'r_{var}_{y}E'] / df[f'r_tot_{y}E']
        
    return  df

####  Urban Villages

In [8]:
dfY1 = rent_uvil(bgp_20,y1)
dfY0 = rent_uvil(bgp_10,y0)
df_uv = pd.merge(dfY0,dfY1,on='name',how='left')
df_uv = rent_change(df_uv,['u30','3050','o50','na'],y1,y0)

In [9]:
df_uv.head()

Unnamed: 0,name,r_o50_13E,r_o50_13M,r_o50_13C,r_tot_13E,r_tot_13M,r_tot_13C,r_3050_13E,r_3050_13M,r_3050_13C,...,r_o50_1321E,r_o50_1321M,r_o50_1321C,r_o50_13P,r_o50_21P,r_na_1321E,r_na_1321M,r_na_1321C,r_na_13P,r_na_21P
0,Laveen,1132.0,281.465806,15.11518,3846.0,473.684494,7.487106,1033.0,293.600068,17.277859,...,-74.0,380.267011,312.385617,0.294332,0.247023,-123.0,168.949697,83.499986,0.060322,0.025449
1,Deer Valley,4496.0,497.294681,6.723906,23216.0,1017.806956,2.665091,5979.0,623.671388,6.341053,...,766.0,866.01963,68.727898,0.19366,0.198252,125.0,465.665116,226.463278,0.043289,0.042574
2,Maryvale,9413.0,710.069011,4.58571,30255.0,1109.630119,2.22954,7943.0,687.452544,5.26129,...,-2331.0,1005.710197,26.22797,0.311122,0.223541,-603.0,535.111203,53.946196,0.070831,0.04861
3,North Mountain,6782.0,635.919806,5.70005,28423.0,1098.14571,2.348682,7054.0,696.89454,6.005729,...,835.0,1008.883046,73.449433,0.23861,0.240322,48.0,562.500667,712.386863,0.061675,0.056823
4,Paradise Valley,5205.0,576.446008,6.732432,23616.0,1016.715791,2.617142,5307.0,588.318791,6.739035,...,95.0,946.788255,605.847548,0.220401,0.220081,-151.0,447.912938,180.322848,0.050263,0.04302


#### City of Phoenix

In [None]:
def make_phx(year):
    df = get.get_phx(source,year,cols_rent)
    df = get.clean_data(df,rent_vars)
    df = group_rent(df,year)
    return df

In [None]:
df1 = make_phx(y1)
df0 = make_phx(y0)

In [None]:
df_phx = pd.merge(df0,df1,on='GEO_ID',how='left')
df_phx = geo_agg.sumgeo_cv(df_phx,'GEO_ID') #calc cvs
df_phx =  rent_change(df_phx,['u30','3050','o50','na'],y1,y0)

In [None]:
df_phx.head()

df_phx.to_excel('output/phx_rent.xlsx',index=False)

## OWNER BURDEN
*Note: judging by statistical reliability in the renter cohort, this code assess owner cost burden for all owners, with or without a mortgage. Hypothesis is that the more detailed cut for Urban Village areas (i.e., with a mortgage vs. without a mortgage) will not yield any statistically significant findings.

#### Urban Villages

In [None]:
def group_own(df,year):
    df['o_tot_E'] = df.loc[:,o_tot_e].sum(axis=1)
    df['o_tot_M'] = df.apply(lambda x: (calc.get_moe(x[o_tot_m])),axis=1)
    df['o_u30_E'] = df.loc[:,o_u30_e].sum(axis=1)
    df['o_u30_M'] = df.apply(lambda x: (calc.get_moe(x[o_u30_m])),axis=1)
    df['o_3050_E'] = df.loc[:,o_3050_e].sum(axis=1)
    df['o_3050_M'] = df.apply(lambda x: (calc.get_moe(x[o_3050_m])),axis=1)
    df['o_o50_E'] = df.loc[:,o_o50_e].sum(axis=1)
    df['o_o50_M'] = df.apply(lambda x: (calc.get_moe(x[o_o50_m])),axis=1)
    df['o_na_E'] = df.loc[:,o_na_e].sum(axis=1)
    df['o_na_M'] = df.apply(lambda x: (calc.get_moe(x[o_na_m])),axis=1)
    
    df = df.drop(own_vars[1:],axis=1).copy()
    
    for col in df.columns[1:]:
        df.rename(columns={col:f'{col[:-1]}{year[-2:]}{col[-1:]}'},inplace=True)
    return df

In [None]:
def own_uvil(geo_df,year):
    df = get.get_bgp(source,year,cols_own)
    df = get.clean_data(df,own_vars)
    df = group_own(df,year)
    df = pd.merge(geo_df,df,how='left',left_on='geoid',right_on='GEO_ID')
    df = df.drop(['geoid','GEO_ID'],axis=1)
    df = geo_agg.sumgeo_cv(df,'name')
    return df

In [None]:
def own_change(df,change_vars,year1,year0):
    for var in change_vars:
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}E'] = df[f'o_{var}_{year1[-2:]}E'] - df[f'o_{var}_{year0[-2:]}E']
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}M'] = df.apply(lambda x: (calc.get_moe([x[f'o_{var}_{year0[-2:]}M'],\
                                                                                x[f'o_{var}_{year1[-2:]}M']])),axis=1)
        df[f'o_{var}_{year0[-2:]}{year1[-2:]}C'] = df.apply(lambda x: (calc.get_cv(x[f'o_{var}_{year0[-2:]}{year1[-2:]}E'],\
                                                                            x[f'o_{var}_{year0[-2:]}{year1[-2:]}M'])),axis=1)
        for  y  in [year0[-2:],year1[-2:]]:
            df[f'o_{var}_{y}P'] = df[f'o_{var}_{y}E'] / df[f'o_tot_{y}E']
        
    return  df

In [None]:
dfoY1 = own_uvil(bgp_20,y1)
dfoY0 = own_uvil(bgp_10,y0)

dfo_uv = pd.merge(dfoY0,dfoY1,on='name',how='left')
dfo_uv = own_change(dfo_uv,['u30','3050','o50','na'],y1,y0)

In [None]:
dfo_uv.head()

In [None]:
dfo_uv.to_excel('output/own_uv.xlsx',index=False)

#### City of Phoenix

In [None]:
def make_phx_o(year):
    df = get.get_phx(source,year,cols_own)
    df = get.clean_data(df,own_vars)
    df = group_own(df,year)
    return df

In [None]:
dfo1 = make_phx_o(y1)
dfo0 = make_phx_o(y0)

In [None]:
df_phx_o = pd.merge(dfo0,dfo1,on='GEO_ID',how='left')
df_phx_o = geo_agg.sumgeo_cv(df_phx_o,'GEO_ID') #calc cvs
df_phx_o =  own_change(df_phx_o,['u30','3050','o50','na'],y1,y0)

In [None]:
df_phx_o

In [None]:
df_phx_o.to_excel('output/df_phx_o.xlsx',index=False)