In [6]:
from gtfs_functions import Feed
import pandas as pd
import geopandas as gpd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio


In [31]:
gtfs_path = "https://feeds.mta.maryland.gov/gtfs/local-bus"
feed = Feed(gtfs_path, time_windows=[0, 6, 9, 12, 15, 19, 24])
routes = feed.routes
routes.head(2)
stop_freq = feed.stops_freq
stop_freq["window_start"] = stop_freq.window.str.extract(
    r"(\d+):"
).astype(int)
stop_freq["window_end"] = stop_freq.window.str.extract(
    r"-(\d+):"
).astype(int)
stop_freq["tph"] = stop_freq.ntrips / (
    stop_freq.window_end - stop_freq.window_start
)
line_freq = feed.lines_freq
line_freq['window_start'] = line_freq.window.str.extract(r"(\d+):").astype(int)
line_freq['window_end'] = line_freq.window.str.extract(r"-(\d+):").astype(int)
line_freq['tph'] = line_freq.ntrips / (line_freq.window_end - line_freq.window_start)
line_freq.head(2)

INFO:root:Reading "routes.txt".
INFO:root:Reading "stop_times.txt".
INFO:root:get trips in stop_times
INFO:root:accessing trips
INFO:root:Start date is None. You should either specify a start date or set busiest_date to True.
INFO:root:Reading "trips.txt".
INFO:root:Reading "calendar.txt".
INFO:root:Reading "calendar_dates.txt".
INFO:root:The busiest date/s of this feed or your selected date range is/are:  ['2024-05-27'] with 9185 trips.
INFO:root:In the case that more than one busiest date was found, the first one will be considered.
INFO:root:In this case is 2024-05-27.
INFO:root:Reading "stop_times.txt".
INFO:root:_trips is defined in stop_times
INFO:root:Reading "stops.txt".
INFO:root:computing patterns
INFO:root:Reading "shapes.txt".


Unnamed: 0,route_id,route_name,direction_id,window,min_per_trip,ntrips,geometry,window_start,window_end,tph
0,11638,21 Woodberry - Canton Crossing,0,6:00-9:00,180,1,"LINESTRING (-76.56925 39.27539, -76.56928 39.2...",6,9,0.333333
1,11638,21 Woodberry - Canton Crossing,0,0:00-6:00,40,9,"LINESTRING (-76.56925 39.27539, -76.56928 39.2...",0,6,1.5


In [32]:
line_freq = line_freq.sort_values('window_start')

layout = {'title': 'Categorical X-Axis, with Jitter for Trips per hour and window for each line'}
traces = []

# Instead, we'll use a for loop to create the traces.
for line in line_freq.route_name.unique():
    df = line_freq[line_freq.route_name == line]
    traces.append({'y': df['tph'], 'x': df['window'], 'name': line, 'marker': {'color': 'black', 'size': 4}})
    
# Update (add) trace elements common to all traces.
for t in traces:
    t.update({'type': 'box',
              'boxpoints': 'all',
              'fillcolor': 'rgba(255,255,255,0)',
              'hoveron': 'points',
              'hovertemplate': 'Time window=%{x}<br>Trips per hour=%{y}<br>Route=%{name}<extra></extra>',
              'line': {'color': 'rgba(255,255,255,0)'},
              'boxmean': 'sd',  # Show the standard deviation in the box
            #   'marker': {'outliercolor': 'rgba(255,0,0,0.6)'},  # Change the color of outliers
              'pointpos': 0,
              'showlegend': False})
    
# Calculate the median for each window and sort by 'window_start'
medians = line_freq.groupby(['window', 'window_start'])['tph'].median().reset_index()
medians = medians.sort_values('window_start')

# Create a line trace for the medians
median_trace = {
    'x': medians['window'],
    'y': medians['tph'],
    'mode': 'lines',
    'name': 'Median',
    'line': {'color': 'red'}
}

# Add the median trace to the list of traces and show the figure
traces.append(median_trace)
pio.show({'data': traces, 'layout': layout})