In [1]:
import requests
import time
import pandas as pd

In [2]:
import os
if 'COLAB_GPU' in os.environ:
    from google.colab import  drive
    drive.mount('/drive')
    data_path = '/drive/Shared drives/Capstone/notebooks/data'
else:
    data_path = 'data'


### Metric: Location Affordability Index : Version 3.0 (March 2019) – uses 2012-2016 American Community Survey

The Low Transportation Cost Index is based on estimates of transportation expenses for a family that meets the following description: 
-  hh6 = a 3-person single-parent family with income at 50% of the median income for renters for the region (i.e. CBSA). 

- **hh6_t_renters** (transportation costs as a percent of income for renters)
- **hh6_transit_trips_renters** (the number of public transit trips taken annually)

The estimates come from the Location Affordability Index (LAI). The data correspond to those for household type 6 (hh_type6_) as noted in the LAI data dictionary. More specifically, among this household type, we model transportation costs as a percent of income for renters (t_rent). Neighborhoods are defined as census tracts. The LAI data do not contain transportation cost information for Puerto Rico.

## Interpretation

**Metric: Transit trips index**

This metric reflects the number of public transit trips taken annually by a three-person single-parent
family with income at 50 percent of the AMI for renters. This number is percentile ranked nationally
into an index with values ranging from 0 to 100. Higher scores reflect better access to public
transportation.

**Metric: Low transportation cost index**

This index reflects local transportation costs as a share of renters’ incomes. It accounts for both transit
and cars. This index is based on estimates of transportation costs for a three-person, single-parent
family with income at 50 percent of the median income for renters for the region (i.e., a core-based
statistical area). Although other arrangements of family composition, income, and housing status are
possible in constructing this index, these characteristics were intended to more closely characterize a
lower-income household in the community. Values are inverted and percentile ranked nationally, with
values ranging from 0 to 100. The higher the value, the lower the cost of transportation in that
neighborhood.

 
Location Affordability Index : 
Version 3.0 (March 2019) – uses 2012-2016 American Community Survey data


### Dataset Description: 
The database generated by the foregoing methodology contains records for 72,241 U.S. Census tracts in the 50 states and the District of Columbia. The first set of fields (columns A-AL) contain data used to calibrate the model and then serve as inputs into the model to estimate household housing and transportation costs for the eight household profiles—including estimates for both owner and renter households—listed in Table 11 for each occupied Census tract. 

### API Documentation: 
https://developers.arcgis.com/rest/services-reference/enterprise/query-feature-service-layer-.htm

https://hudgis-hud.opendata.arcgis.com/datasets/location-affordability-index-v-3/api

There are about 440 variables so we need to look at which to examine
page 29 has variable explanation:
 https://files.hudexchange.info/resources/documents/Location-Affordability-Index-Version-3-Data-and-Methodology.pdf 

Variable of interest: 


In [3]:

def get_transportation_df(recreate=False):
  if recreate:
    final_features = []
    for i in range(1, 73):
      url = f"https://services.arcgis.com/VTyQ9soqVukalItT/arcgis/rest/services/Location_Affordability_Index_v3/FeatureServer/0/query?where=STATE={str(i).zfill(2)}&outFields=GEOID,STATE,COUNTY,TRACT,CNTY_FIPS,STUSAB,hh6_t_renters,hh6_transit_trips_renters&outSR=4326&f=json"
      state_response = requests.get(url)
      json_data = state_response.json()
      features = [row['attributes'] for row in json_data['features']]
      final_features.extend(features)
      time.sleep(3)
    trans_df = pd.DataFrame(final_features)
    trans_df.to_csv(f'{data_path}/interim/transportation_index.csv')
  else:
    trans_df = pd.read_csv(f'{data_path}/interim/transportation_index.csv', index_col=0)
  return trans_df

trans_df = get_transportation_df()

#map tract to city/place

In [4]:
trans_df = pd.read_csv(f'{data_path}/interim/transportation_index.csv', index_col=0)

In [5]:
trans_df['hh6_t_renters'].describe()

count    37221.000000
mean        36.182307
std         10.532436
min          4.488886
25%         29.011102
50%         35.480153
75%         42.671902
max        102.934237
Name: hh6_t_renters, dtype: float64

In [6]:

# drop PR all nans

trans_df = trans_df[(trans_df['STUSAB'].str.contains("PR")==False)]

In [7]:
trans_df['transit_trips_index'] = trans_df['hh6_transit_trips_renters'].rank(pct = True) * 100

In [8]:
trans_df['hh6_t_renters_inverse'] = 100/trans_df['hh6_t_renters']

In [9]:
trans_df['transit_low_cost_index'] = trans_df['hh6_t_renters_inverse'].rank(pct = True) * 100

In [10]:
national_trans_df = trans_df[['CNTY_FIPS', 'transit_trips_index', 'transit_low_cost_index']].rename(columns={'CNTY_FIPS':'FIPS'}).groupby('FIPS', as_index=False)[['transit_trips_index', 'transit_low_cost_index']].mean()

In [11]:
national_trans_df.head()

Unnamed: 0,FIPS,transit_trips_index,transit_low_cost_index
0,1001.0,30.26047,16.325287
1,1003.0,32.650574,31.787807
2,1005.0,14.027472,2.672571
3,1007.0,34.410112,25.42538
4,1009.0,26.128972,24.257059


In [12]:
national_trans_df.to_csv(f'{data_path}/processed/national_transportation_index.csv', index=False)