# ACS Cost Burden for Renters and Homeowners

#### This notebook analyzes NYC Metro cost burden (30% or more of HHI on rent or mortgage) - UPDATE THIS MORE

- https://www.census.gov/data/developers/data-sets/acs-5year.html

For total labor force & population
- https://api.census.gov/data/2010/acs/acs5/profile/variables.html
- https://api.census.gov/data/2020/acs/acs5/profile/variables.html

For labor force by cohort
- https://api.census.gov/data/2010/acs/acs5/variables.html
- https://api.census.gov/data/2020/acs/acs5/variables.html

### Set proxy prior to running

In [23]:
# SET TO TRUE/FALSE
onDCPServer = True
#
#

In [24]:
import pandas as pd
import math
import numpy as np
import os

In [25]:
from geo import *
import get_acs as get
import geo_agg

In [26]:
## proxy assignment, None if working off-network
if onDCPServer == False:
    p = None
else:
    from proxy import p

In [27]:
date = '220510'

In [28]:
#Search parameters - NOTE DIFFERENT VARIABLE # ASSIGNMENTS IN 2010 VS 2020
y1 = '2020'
y0 = '2010'

cols_y1 = f'GEO_ID,DP04_0136E,DP04_0136M,DP04_0141E,DP04_0141M,DP04_0142E,DP04_0142M,DP04_0110E,DP04_0110M,DP04_0114E,DP04_0114M,DP04_0115E,DP04_0115M'
cols_y0 = f'GEO_ID,DP04_0134E,DP04_0134M,DP04_0139E,DP04_0139M,DP04_0140E,DP04_0140M,DP04_0108E,DP04_0108M,DP04_0112E,DP04_0112M,DP04_0113E,DP04_0113M'

source = 'acs/acs5/profile'

In [29]:
#variables and variable aggregation
var_data_y1 = ['GEO_ID','DP04_0136E','DP04_0136M','DP04_0141E','DP04_0141M','DP04_0142E','DP04_0142M',\
               'DP04_0110E','DP04_0110M','DP04_0114E','DP04_0114M','DP04_0115E','DP04_0115M']

rb_y1_E = ['DP04_0141E','DP04_0142E'] #gross rent over 30% of hh income
rb_y1_M = ['DP04_0141M','DP04_0142M']
ob_y1_E = ['DP04_0114E','DP04_0115E'] #mortgage payment over 30% of hh income
ob_y1_M = ['DP04_0114M','DP04_0115M']


var_data_y0 = ['GEO_ID','DP04_0134E','DP04_0134M','DP04_0139E','DP04_0139M','DP04_0140E','DP04_0140M',\
               'DP04_0108E','DP04_0108M','DP04_0112E','DP04_0112M','DP04_0113E','DP04_0113M']

rb_y0_E = ['DP04_0139E','DP04_0140E'] #gross rent over 30% of hh income
rb_y0_M = ['DP04_0139M','DP04_0140M']
ob_y0_E = ['DP04_0112E','DP04_0113E'] #mortgage payment over 30% of hh income
ob_y0_M = ['DP04_0112M','DP04_0113M']

## NYC Metro counties, subregion, region labor force in Y1 and Y0, with change

In [30]:
dfY1 = get.get_county(p,source,y1,cols_y1)
dfY1 = get.clean_data(dfY1,var_data_y1)

In [31]:
dfY1['Rent_Burd_Y1'] = dfY1.loc[:,rb_y1_E].sum(axis=1)
dfY1['Rent_Tot_Y1'] = dfY1['DP04_0136E']
dfY1['Own_Burd_Y1'] = dfY1.loc[:,ob_y1_E].sum(axis=1)
dfY1['Own_Tot_Y1'] = dfY1['DP04_0110E']
dfY1 = dfY1.drop(var_data_y1[1:],axis=1)

In [37]:
dfY1.head()

Unnamed: 0,GEO_ID,Rent_Burd_Y1,Rent_Tot_Y1,Own_Burd_Y1,Own_Tot_Y1
400,36027,17495.0,32985.0,14979.0,47339.0
406,36061,247346.0,550771.0,23163.0,90176.0
409,36079,2881.0,5701.0,8098.0,20048.0
410,36085,25744.0,47467.0,30490.0,75908.0
612,34003,54778.0,114546.0,50008.0,141705.0


In [33]:
dfY0 = get.get_county(p,source,y0,cols_y0)
dfY0 = get.clean_data(dfY0,var_data_y0)

In [34]:
dfY0['Rent_Burd_Y0'] = dfY0.loc[:,rb_y0_E].sum(axis=1)
dfY0['Rent_Tot_Y0'] = dfY0['DP04_0134E']
dfY0['Own_Burd_Y0'] = dfY0.loc[:,ob_y0_E].sum(axis=1)
dfY0['Own_Tot_Y0'] = dfY0['DP04_0108E']
dfY0 = dfY0.drop(var_data_y0[1:],axis=1)

In [35]:
dfY0.head()

Unnamed: 0,GEO_ID,Rent_Burd_Y0,Rent_Tot_Y0,Own_Burd_Y0,Own_Tot_Y0
438,9001,49148.0,92171.0,77685.0,171350.0
440,9005,7082.0,14646.0,17579.0,42894.0
442,9009,60022.0,107535.0,65006.0,155173.0
1900,34003,49738.0,102534.0,74701.0,152821.0
1905,34013,72973.0,140545.0,49716.0,95135.0


In [38]:
#Merge the two dataframes using the stco identifier
dfY0Y1 = pd.merge(dfY0,dfY1,how='left',on='GEO_ID')

In [39]:
dfY0Y1.head()

Unnamed: 0,GEO_ID,Rent_Burd_Y0,Rent_Tot_Y0,Own_Burd_Y0,Own_Tot_Y0,Rent_Burd_Y1,Rent_Tot_Y1,Own_Burd_Y1,Own_Tot_Y1
0,9001,49148.0,92171.0,77685.0,171350.0,59050.0,109228.0,56090.0,158270.0
1,9005,7082.0,14646.0,17579.0,42894.0,8027.0,16242.0,11804.0,37612.0
2,9009,60022.0,107535.0,65006.0,155173.0,61681.0,118782.0,43395.0,137490.0
3,34003,49738.0,102534.0,74701.0,152821.0,54778.0,114546.0,50008.0,141705.0
4,34013,72973.0,140545.0,49716.0,95135.0,83585.0,154816.0,36342.0,89499.0


In [41]:
reg = dfY0Y1.copy()
reg['reg'] = 'Metro'
reg = reg.groupby('reg').sum().reset_index()
reg

Unnamed: 0,reg,Rent_Burd_Y0,Rent_Tot_Y0,Own_Burd_Y0,Own_Tot_Y0,Rent_Burd_Y1,Rent_Tot_Y1,Own_Burd_Y1,Own_Tot_Y1
0,Metro,1763570.0,3399606.0,1461211.0,3063416.0,1890504.0,3646869.0,1055281.0,2828351.0


In [42]:
reg.to_excel('../../output/Housing/CostBurden_region_20102020.xlsx',index=False)

In [43]:
dfY0Y1.to_excel('../../output/Housing/CostBurden_counties_20102020.xlsx',index=False)