# Combination of housing and transporataion costs (CITY LEVEL)

### Preparation: import package

In [1]:
# a handy trick to get rid of deprecation warnings
import warnings
warnings.filterwarnings("ignore") 

In [2]:
from configparser import ConfigParser
import matplotlib.pyplot as plt
import pandas as pd
from census import Census
from us import states
import pyproj
import geopandas as gpd
import numpy as np
import math
import platform
import datetime
import sys
import os
import subprocess
#import seaborn as sb


from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.path import Path
import matplotlib.patheffects as PathEffects
from matplotlib import patheffects
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import palettable.matplotlib as mplpal
import palettable.colorbrewer.sequential as mplpals
from legendgram import legendgram
import mapclassify # used for reproducing the group of data in natural breaks
%matplotlib inline


### Preparation: updated stored files or links

In [3]:
# Read all environment variables
config = ConfigParser()
config.readfp(open(r'Config.py'))

tx_state = config.get('General',"tx_state") #tx_state shapefile
tx_county  = config.get('General',"tx_county") # tx_county shapefile
tx_bgs = config.get('General',"tx_bgs") # tx_bgs shapefile
tx_city = config.get('General',"tx_city") # tx_bgs shapefile

housing = config.get('HTcosts',"housing") # import housing expense
transit = config.get('HTcosts',"transit") # import transit expense
auto = config.get('HTcosts',"auto") # import auto expense


In [4]:
#import each dataframe created by previous notebooks (housing, transit, auto-use)
housing =pd.read_csv (housing)
transit_cost =pd.read_csv (transit)
transit_cost = transit_cost [['geoid','transitcost_hh']]# only maintain necessary variables
autoexp =pd.read_csv (auto)


In [5]:
# Access shapefile of Texas state
tx_state = gpd.read_file(tx_state) 

# Access shapefile of Texas county
tx_county = gpd.read_file(tx_county)

# Access shapefile of Texas census block groups
tx_bgs = gpd.read_file(tx_bgs)

# Access shapefile of Texas cities
tx_city= gpd.read_file(tx_city)


## A. Produce the final comprehensive dataframe for H+T costs

In [6]:
# Merge all data frames into one
H_Tcost = autoexp.merge(transit_cost, how='left', on='geoid').merge(housing, how='left', on='geoid')

H_Tcost.geoid = H_Tcost.geoid.astype(str)

#####calcualte some needed variables in the final comphrehensive dataframe####
#produce detailed total expense combination
H_Tcost ['Transportation_Cost'] = H_Tcost ['driving_cost_year'] + H_Tcost ['transitcost_hh']+ H_Tcost ['carownership_expense_hh'] 
H_Tcost ['combined H+T'] = H_Tcost ['driving_cost_year'] + H_Tcost ['transitcost_hh']+ H_Tcost ['carownership_expense_hh'] + H_Tcost ['ave_housing_yearly'] 

H_Tcost = H_Tcost.fillna(0)

In [7]:
H_Tcost.describe()

Unnamed: 0,all_vmt_hh,hh,carownership_expense_hh,driving_cost_year,annual_expense_individual,transitcost_hh,medincome,totpop,rent_cost,housing_mortgage,ave_housing_yearly,ave_housing_monthly,HHsize,Transportation_Cost,combined H+T
count,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0,994.0
mean,13594.864029,786.450704,5303.016934,2439.624576,40222.20687,31.123032,81035.470825,2174.015091,1168.016097,1817.15996,20181.659278,1681.80494,2.715312,7773.764542,27955.42382
std,9256.619809,509.805731,2017.677819,1639.482197,18092.397894,72.072499,40760.426979,1496.573497,623.279203,918.115747,7787.539055,648.961588,0.671209,3046.847239,9030.934413
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6810.292662,458.25,4044.449226,1251.572949,27487.146804,0.0,55402.25,1225.5,963.0,1393.0,15091.144107,1257.595342,2.26,5506.423183,22146.480026
50%,11117.814375,666.0,5382.341022,2002.567596,35499.278676,0.0,73483.0,1795.5,1231.0,1734.5,18559.90532,1546.658777,2.68,7584.098077,26893.052341
75%,17954.995949,965.5,6815.743342,3183.666016,49972.115127,36.370993,100085.5,2636.75,1559.0,2295.75,23638.474286,1969.872857,3.1375,9847.855441,32575.802002
max,68011.985156,4237.0,10629.505779,11412.834384,94633.174727,1014.939765,250001.0,11726.0,3501.0,4001.0,47564.941176,3963.745098,5.43,18406.027905,57880.457994


## B. calculate total expenditures at city level

#### Load TIGER/LINE shapefile 

In [8]:
### County's Shapfile ###
# Access shapefile of Texas county, reproject shapefile to the EPSG identifier of NAD83
tx_county = tx_county.to_crs(epsg = 4269)

#clean data through keeping only necessary variables
tx_county = tx_county[['GEOID','NAME','geometry']]

# rename  columns to lower case
tx_county.columns= tx_county.columns.str.lower()

# narrow down to CAMPO county

county = ['48021', '48053', '48055','48209','48453','48491']
campo_county = tx_county[tx_county['geoid'].isin(county)]
campo_county.reset_index(drop=True, inplace=True)

campo_county ['namelsad'] = campo_county ['name'] # in order to plotting the names

### Block groups' Shapfile ###
# Access shapefile of Texas census block groups, reproject shapefile to the EPSG identifier of NAD83
tx_bgs = tx_bgs.to_crs(epsg = 4269)

#clean data through keeping only necessary variables

tx_bgs = tx_bgs[['GEOID', 'ALAND','geometry']]

# rename  columns to lower case
tx_bgs.columns= tx_bgs.columns.str.lower()

# merge shapefile and H+T cost dataframe
tx_bgs.geoid = tx_bgs.geoid.astype(str)
H_Tcost.geoid = H_Tcost.geoid.astype(str)


H_Tcost = tx_bgs.merge(H_Tcost, on = "geoid")
H_Tcost = H_Tcost.drop(columns = {'aland'})

H_Tcost = H_Tcost.rename(columns = {'housing_mortgage':'housing_mortgage_monthly',
                                   'rent_cost':'housing_rent_monthly'
                                   }) # specify columns to keep

# Reproject shapefile to the EPSG identifier of NAD83
H_Tcost = H_Tcost.to_crs(epsg = 4269)

In [9]:
### City's Shapfile ###
# Access shapefile of Texas city, reproject shapefile to the EPSG identifier of NAD83
tx_city  = tx_city.to_crs(epsg = 4269)

#clean data through keeping only necessary variables
tx_city  = tx_city[['STATEFP','GEOID','NAME','NAMELSAD','CLASSFP','geometry']]

# rename  columns to lower case
tx_city.columns= tx_city.columns.str.lower()

# narrow down to TX state 

state = ['48']
tx_city   = tx_city[tx_city ['statefp'].isin(state)]


# Let’s make sure that the coordinate reference system of the layers are identical
tx_city.crs == H_Tcost.crs

True

#### Spatial join cities and block groups shapefile, to convert expenditures at the block groups level to the city level

In [10]:
#### spatial join (intersect): overlapping ratio

### The expenditure at the block group level is allocated to the corresponding city area in proportion to the overlapping area

def get_sqmi(row):
    feet = row.geometry.area
    return feet / 27878400

H_Tcost ['area_bgs'] = H_Tcost.to_crs({'init': 'epsg:4269'}).apply(get_sqmi, axis=1) # calculate total area


city_bgs = gpd.sjoin(H_Tcost, tx_city,how='left',op='intersects') 
city_bgs['area_overlap'] = city_bgs.apply(get_sqmi, axis=1)


# allocate values at bgs level to city region based on the overlapping ratio of area
city_bgs ['area_weights'] = city_bgs['area_overlap']/city_bgs ['area_bgs']


# convert transportation costs and annual expenditures at bgs level to city level
city_bgs['annual_expense_city'] = city_bgs ['annual_expense_individual'] * city_bgs['area_weights'] 
city_bgs['transportation_cost_city'] = city_bgs ['Transportation_Cost'] * city_bgs['area_weights'] 
city_bgs['combined H+T'] = city_bgs ['combined H+T'] * city_bgs['area_weights'] 
city_bgs['medincome'] = city_bgs ['medincome'] * city_bgs['area_weights'] 


# clean data and merge with city shapefile

city_bgs=city_bgs.dropna()

In [11]:
# get weighted average value of each types of expenditures at city level, in proportion to the households 

city_bgs['hh_weights']= city_bgs['hh']/city_bgs['hh'].sum()

city_cost = city_bgs.groupby('geoid_right')

def weighted_average(data):
    d = {}
    d['annual_expense_city'] = np.average(data['annual_expense_city'], weights=data['hh_weights'])
    d['transportation_cost_city'] = np.average(data['transportation_cost_city'], weights=data['hh_weights'])
    d['combined H+T'] = np.average(data['combined H+T'], weights=data['hh_weights'])
    d['medincome'] = np.average(data['medincome'], weights=data['hh_weights'])
    return pd.Series(d)

city_cost = city_bgs.groupby('geoid_right').apply(weighted_average)

city_cost.reset_index(inplace=True)


#### Recalcualte H+T combination costs and other needed variables at city level

In [12]:
# calcualte H+T costs as a share of median household income
city_cost ['combined H+T share'] = (city_cost ['combined H+T'] / city_cost['medincome']) *100

city_cost.replace([np.inf, -np.inf], 0, inplace=True)
city_cost.loc[city_cost ['combined H+T share'] >100, ['combined H+T share']] = 100


# buidl affordability index
city_cost ['Affordability'] = "Unapplicable"
city_cost.loc[city_cost ['combined H+T share'] >=45, "Affordability"] = "Affordable"
city_cost.loc[city_cost ['combined H+T share'] < 45, "Affordability"] = "Un-affordable"


#Transportation costs as a share of total household expenditures

city_cost ['transportation share'] = city_cost['transportation_cost_city'] / city_cost ['annual_expense_city'] *100
city_cost.loc[city_cost ['transportation share'] >100, ['transportation share']] = 100


city_cost = tx_city.merge (city_cost ,how = 'right', left_on = 'geoid', right_on = 'geoid_right')


In [13]:
city_cost1 = city_cost.drop(columns = ['statefp','name','geometry','classfp','geoid_right']) # clean data

In [14]:
city_cost1.to_csv('city_share.csv',index=False) # save it to csv file