In [1]:
from pathlib import Path
import pandas as pd
import calendar

In [7]:
DATA_PATH = Path().home() / 'chn-ghost-buses' / 'data_output' / 'scratch'

In [3]:
ridership = pd.read_csv('https://data.cityofchicago.org/api/views/jyb9-n7fm/rows.csv?accessType=DOWNLOAD')

In [4]:
def get_latest_month_and_year(ridership: pd.DataFrame) -> tuple:
    ridership.loc[:,'date'] = pd.to_datetime(ridership.loc[:, 'date'], format="%m/%d/%Y")
    latest_date = ridership['date'].max()
    return latest_date.month, latest_date.year


In [5]:
def ridership_to_json(ridership: pd.DataFrame, month: int = None, year: int = None) -> None:
    """Save ridership data to JSON for given month and year.
    Note that the data is typically a few months 
    behind the current date. Takes the latest available data if
    month or year are None.

    Args:
        month (int): Month of interest. Defaults to None
        year (int): Year of interest. Defaults to None
    """
    latest_month, latest_year = get_latest_month_and_year(ridership)
    if month is None:
        month = latest_month
    elif year is None:
        year = latest_year
    month_name = calendar.month_name[month]
    ridership['date'] = pd.to_datetime(ridership.date, format="%m/%d/%Y")
    ridership.rename({'route': 'route_id'}, axis = 1, inplace = True)
    df= ridership[(ridership['date'].dt.month == month) & (ridership['date'].dt.year == year)].copy()
    df['day_type'] = df.daytype.map({'W': 'weekday', 'A': 'sat', 'U': 'sun'})
    df.loc[df.date == '2022-07-04', 'day_type'] = 'hol'
    df[['route_id', 'date', 'day_type', 'rides']].to_json(DATA_PATH / f'daily_{month_name}_{year}_cta_ridership_data.json',orient = 'records')
    df_daytype_summary = df.groupby(by = ['route_id', 'day_type']).agg({'rides': ['mean', 'sum']}).reset_index()
    df_daytype_summary.columns = ['route_id', 'day_type', 'avg_riders', 'total_riders']
    df_daytype_summary.to_json(DATA_PATH / f'{month_name}_{year}_cta_ridership_data_day_type_summary.json',orient = 'records')

In [8]:
ridership_to_json(ridership)