# compute some basic stats

In [81]:
import pandas as pd
import geopandas as gpd
import numpy as np
import os
import datetime
import altair as alt

## Viz - Current Network

let's try and determine how to make a deck.gl visualization of our current network 

In [4]:
def load_gtfs_into_df(data_dir):
    """
    Loads a GTFS file into a merged dataframe 
    from the stops , stop times, routes, and 
    trips table
    """
    data_dir = f"./data/gtfs_feeds/{data_dir}/"
    stops = pd.read_csv(f'{data_dir}stops.txt')
    stop_times = pd.read_csv(f'{data_dir}stop_times.txt')
    routes = pd.read_csv(f'{data_dir}routes.txt')
    trips = pd.read_csv(f'{data_dir}trips.txt')
    calendar = pd.read_csv(f'{data_dir/calendar.txt})
    merged = pd.merge(routes,
            pd.merge(trips,
                pd.merge(stop_times, 
                         stops, 
                         on='stop_id',
                         how = 'left',
                         validate='m:1'
                    ),
                )
            )
    return merged


In [5]:
data_dict = {day: load_gtfs_into_df(day) for day in os.listdir('./data/gtfs_feeds/')}
    

In [25]:
def calculate_number_of_trips(df):
    """
    For a merged DF, calaculate the number of 
    
    weekday trips, sat trips, and sunday trips 
    """
    weekday = df[df.service_id.str.endswith('Weekday')]
    saturday = df[df.service_id.str.endswith('Saturday')]
    sunday = df[df.service_id.str.endswith('Sunday')]
    return {'weekday': weekday.trip_id.nunique(),
             'saturday': saturday.trip_id.nunique(),
             'sunday': sunday.trip_id.nunique()}

In [53]:
num_trips = {k[0:20].rstrip('-'): calculate_number_of_trips(v) for k,v in data_dict.items()}

In [78]:
df = pd.DataFrame.from_dict(num_trips).transpose()
df.index = pd.to_datetime(df.index)
df = df.reset_index().rename({'index': 'service_change_date'}, axis=1)

In [94]:
# replace 0s in the Saturday dat with the sunday values
df = df.sort_values(by='service_change_date').replace(0, np.nan).fillna(method='backfill',axis=1)

In [109]:

# enable the newly registered theme
alt.themes.enable('fivethirtyeight')
alt.Chart(df.melt(id_vars=['service_change_date'], value_vars=['weekday', 'saturday','sunday']).rename({'variable': 'service_type',
                                                                                                        'value': 'number_of_trips'}, axis=1)).mark_line().encode(
    x='service_change_date:T',
    y='number_of_trips',
    color='service_type',
).properties(
    title='Number of Trips per Service Day Type, by change of service date'
)

In [98]:
df.melt(id_vars=['service_change_date'], value_vars=['weekday', 'saturday','sunday'])

Unnamed: 0,service_change_date,variable,value
0,2019-12-17 11:39:35,weekday,13841
1,2019-12-17 15:07:04,weekday,13841
2,2019-12-20 12:40:03,weekday,13841
3,2019-12-28 00:27:34,weekday,13841
4,2020-01-08 18:11:06,weekday,13841
5,2020-01-16 12:48:31,weekday,13841
6,2020-04-15 11:09:43,weekday,10024
7,2020-04-15 16:52:32,weekday,10024
8,2020-04-19 02:05:59,weekday,10024
9,2020-04-25 06:32:38,weekday,10033
