In [1]:
import requests
import pandas as pd
import numpy as np
from io import StringIO, BytesIO
import json
import geopandas as gpd


RTA_ROUTE_URL = 'https://opendata.arcgis.com/datasets/1cb5c63d6f114f8a94c6d5a0e03ae62e_0.csv?outSR=%7B%22latestWkid%22%3A3857%2C%22wkid%22%3A102100%7D'
RTA_ROUTE_DATA = requests.get(RTA_ROUTE_URL)

RTA_STOPS_URL = 'https://gis.massdot.state.ma.us/arcgis/rest/services/Multimodal/RTAs/FeatureServer/1/query?where=1%3D1&outFields=*&outSR=4326&f=json'
RTA_STOPS_DATA = requests.get(RTA_STOPS_URL)

RTA_RIDERSHIP_URL = 'https://www.transit.dot.gov/sites/fta.dot.gov/files/2020-10/August%202020%20Adjusted%20Database.xlsx'
RTA_RIDERSHIP_DATA = requests.get(RTA_RIDERSHIP_URL)

# Rishab helper code
def add_census_tract(dataframe):
    polygons = gpd.read_file("data/tl_2019_25_tract/tl_2019_25_tract.shp")
    polygons = polygons.rename(columns={"TRACTCE": "census_tract"}, index=str)
    polygons = polygons.to_crs("EPSG:26986")
    gdf = dataframe
    df = gpd.sjoin(gdf, polygons[['census_tract', 'geometry']], how='left', op='within')
    df.drop(columns=['index_right'], inplace=True)
    return df


def get_median_hh_income():
    '''
    Returns Pandas DataFrame representation Median Household Income Estimate by Census Tract for MA.
    American Community Survey (ACS) 2018 Census data used.
    Specific table: ACS 2018 5-year detailed table "B19013_001E"
    '''
    URL = "https://api.census.gov/data/2018/acs/acs5?get=B19013_001E&for=tract:*&in=state:25"

    response = requests.get(url = URL)
    data = response.json()

    median_income_df = pd.DataFrame(data[1:len(data)-1], columns = data[0])

    return median_income_df

In [2]:
route_df = pd.read_csv(StringIO(RTA_ROUTE_DATA.content.decode()))
stops_data = json.loads(RTA_STOPS_DATA.content)['features']
ridership_df = pd.read_excel(BytesIO(RTA_RIDERSHIP_DATA.content), sheet_name='MASTER')
agency_set = set([stop['attributes']['Agency'] for stop in stops_data])

# Get only brockton
brockton = {
    'routes': route_df.loc[route_df['Agency'] == 'BrocktonAreaRTA'],
    'ridership': ridership_df.loc[ridership_df['Agency'] == 'Brockton Area Transit Authority'],
    'stops': [stop for stop in stops_data if stop['attributes']['Agency'] == 'BrocktonAreaRTA']
}