In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State

ModuleNotFoundError: No module named 'jupyter_dash'

## Data import and formatting

In [31]:
df = pd.read_csv("../data/MTA_Daily_Ridership.csv")
df.Date = pd.to_datetime(df.Date)
df = df.set_index("Date")
df.head()

Unnamed: 0_level_0,Subways: Total Estimated Ridership,Subways: % of Comparable Pre-Pandemic Day,Buses: Total Estimated Ridership,Buses: % of Comparable Pre-Pandemic Day,LIRR: Total Estimated Ridership,LIRR: % of Comparable Pre-Pandemic Day,Metro-North: Total Estimated Ridership,Metro-North: % of Comparable Pre-Pandemic Day,Access-A-Ride: Total Scheduled Trips,Access-A-Ride: % of Comparable Pre-Pandemic Day,Bridges and Tunnels: Total Traffic,Bridges and Tunnels: % of Comparable Pre-Pandemic Day,Staten Island Railway: Total Estimated Ridership,Staten Island Railway: % of Comparable Pre-Pandemic Day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-03-01,2212965,97,984908,99,86790,100,55825,59,19922,113,786960,98,1636,52
2020-03-02,5329915,96,2209066,99,321569,103,180701,66,30338,102,874619,95,17140,107
2020-03-03,5481103,98,2228608,99,319727,102,190648,69,32767,110,882175,96,17453,109
2020-03-04,5498809,99,2177165,97,311662,99,192689,70,34297,115,905558,98,17136,107
2020-03-05,5496453,99,2244515,100,307597,98,194386,70,33209,112,929298,101,17203,108


In [32]:
df_ridership = df.iloc[:, [0, 2, 4, 6, 8, 10, 12]]
df_percent = df.iloc[:, [1, 3, 5, 7, 9, 11, 13]]

In [33]:
df_monthly_ridership = df_ridership.groupby(pd.Grouper(freq='ME')).sum()
df_monthly_percent = df_percent.groupby(pd.Grouper(freq='ME')).mean()

df_weekly_ridership = df_ridership.groupby(pd.Grouper(freq='W-MON')).sum()
df_weekly_percent = df_percent.groupby(pd.Grouper(freq='W-MON')).mean()

In [34]:
# Melt the data to have a long format suitable for Plotly
melted_data = df.reset_index().melt(
    id_vars='Date',
    value_vars=[
        'Subways: Total Estimated Ridership',
        'Buses: Total Estimated Ridership',
        'LIRR: Total Estimated Ridership',
        'Metro-North: Total Estimated Ridership',
        'Access-A-Ride: Total Scheduled Trips'
    ],
    var_name='Service',
    value_name='Ridership'
)

melted_data

Unnamed: 0,Date,Service,Ridership
0,2020-03-01,Subways: Total Estimated Ridership,2212965
1,2020-03-02,Subways: Total Estimated Ridership,5329915
2,2020-03-03,Subways: Total Estimated Ridership,5481103
3,2020-03-04,Subways: Total Estimated Ridership,5498809
4,2020-03-05,Subways: Total Estimated Ridership,5496453
...,...,...,...
8525,2024-10-27,Access-A-Ride: Total Scheduled Trips,23888
8526,2024-10-28,Access-A-Ride: Total Scheduled Trips,35666
8527,2024-10-29,Access-A-Ride: Total Scheduled Trips,38621
8528,2024-10-30,Access-A-Ride: Total Scheduled Trips,40468


In [36]:
melted_data.Service.unique()

array(['Subways: Total Estimated Ridership',
       'Buses: Total Estimated Ridership',
       'LIRR: Total Estimated Ridership',
       'Metro-North: Total Estimated Ridership',
       'Access-A-Ride: Total Scheduled Trips'], dtype=object)

In [37]:
services = ["Subways: Total Estimated Ridership", "Access-A-Ride: Total Scheduled Trips"]
melted_data = melted_data[melted_data.Service.isin(services)]
melted_data

Unnamed: 0,Date,Service,Ridership
0,2020-03-01,Subways: Total Estimated Ridership,2212965
1,2020-03-02,Subways: Total Estimated Ridership,5329915
2,2020-03-03,Subways: Total Estimated Ridership,5481103
3,2020-03-04,Subways: Total Estimated Ridership,5498809
4,2020-03-05,Subways: Total Estimated Ridership,5496453
...,...,...,...
8525,2024-10-27,Access-A-Ride: Total Scheduled Trips,23888
8526,2024-10-28,Access-A-Ride: Total Scheduled Trips,35666
8527,2024-10-29,Access-A-Ride: Total Scheduled Trips,38621
8528,2024-10-30,Access-A-Ride: Total Scheduled Trips,40468


In [38]:
melted_data.Date.dt.dayofweek >= 5

0        True
1       False
2       False
3       False
4       False
        ...  
8525     True
8526    False
8527    False
8528    False
8529    False
Name: Date, Length: 3412, dtype: bool

In [39]:
melted_data_weekdays = melted_data[melted_data.Date.dt.dayofweek < 5]
melted_data_weekdays

Unnamed: 0,Date,Service,Ridership
1,2020-03-02,Subways: Total Estimated Ridership,5329915
2,2020-03-03,Subways: Total Estimated Ridership,5481103
3,2020-03-04,Subways: Total Estimated Ridership,5498809
4,2020-03-05,Subways: Total Estimated Ridership,5496453
5,2020-03-06,Subways: Total Estimated Ridership,5189447
...,...,...,...
8523,2024-10-25,Access-A-Ride: Total Scheduled Trips,36635
8526,2024-10-28,Access-A-Ride: Total Scheduled Trips,35666
8527,2024-10-29,Access-A-Ride: Total Scheduled Trips,38621
8528,2024-10-30,Access-A-Ride: Total Scheduled Trips,40468


## Basic plots

In [16]:
px.line(data_frame=melted_data_weekdays[melted_data_weekdays.Service == "Subways: Total Estimated Ridership"], x="Date", y="Ridership")

In [8]:
px.line(data_frame=df, x="Date", y="Subways: % of Comparable Pre-Pandemic Day")

In [9]:
px.line(data_frame=df, x="Date", y="Bridges and Tunnels: Total Traffic")

In [10]:
px.line(data_frame=df, x="Date", y="Bridges and Tunnels: % of Comparable Pre-Pandemic Day")

In [71]:
fig = px.area(monthly_sum_df, x=monthly_sum_df.index, y='Subways: Total Estimated Ridership')
fig.show()

In [13]:
import plotly.express as px
import pandas as pd

# Sample data
data = df
data['Date'] = pd.to_datetime(data['Date'])

# Creating a pivot table for ridership data
pivot_table = data.pivot_table(
    index=data['Date'].dt.month,  # Y-axis: months
    columns=data['Date'].dt.day,  # X-axis: days
    values='Subways: Total Estimated Ridership',  # Ridership values
    aggfunc='sum'
)

# Create heatmap
fig = px.imshow(pivot_table,
                labels=dict(x="Day", y="Month", color="Ridership"),
                x=pivot_table.columns,
                y=pivot_table.index,
                color_continuous_scale='Viridis')

fig.update_layout(title='Monthly Ridership Heatmap')
fig.show()

## Animations

In [44]:
def create_animation(variable, frequency):
    if variable == "ridership":
        if frequency == "monthly":
            dfi = df_monthly_ridership
        else:
            dfi = df_weekly_ridership
    else:
        if frequency == "monthly":
            dfi = df_monthly_percent
        else:
            dfi = df_weekly_percent
    start = 12
    obs = len(dfi)

    # new datastructure for animation
    df = pd.DataFrame() # container for df with new datastructure
    for i in np.arange(start,obs):
        dfa = dfi.head(i).copy()
        dfa['ix']=i
        df = pd.concat([df, dfa])

    # plotly figure
    fig = px.line(df, x = df.index, y = df.columns,
                  template="plotly_dark",
                  animation_frame='ix',
                  # template = 'plotly_dark',
                  width=1000, height=600)
    
    if variable == "percent":
        series_names = [x[0:-34] for x in dfi.columns]
        legend_title = '% of Comparable Pre-Pandemic Day'
    else:
        series_names = [x.split(": Total Estimated Ridership")[0] for x in dfi.columns]
        legend_title = "Total Estimated Ridership"

    for idx, name in enumerate(series_names):
        fig.data[idx].name = name
        fig.data[idx].hovertemplate = name

    # attribute adjusments
    fig.layout.updatemenus[0].buttons[0]['args'][1]['frame']['redraw'] = True
    fig.update_xaxes(title = "", range = [df.index.min(), df.index.max()])
    fig.update_yaxes(title="Percent [%]")
    fig.update_layout(width = 1200, height = 500, legend = dict(font = dict(size = 14, color = "white")),
                      legend_title_text= legend_title,
                      legend_title = dict(font = dict(size = 16, color = "white")))
    fig.show()

In [46]:
create_animation(variable = "ridership", frequency= "monthly")

## Dash in Jupyter

In [None]:
# construct a figure with frames
frames=[go.Frame(name=n, data=go.Scatter(y=np.random.uniform(1, 5, 50)))
        for n in range(8)]
fig = go.Figure(data=frames[0].data, frames=frames)
# fig = fig.update_layout(
#     updatemenus=[{"buttons": [{"args": [None, {"frame": {"duration": 500, "redraw": True}}],
#                                "label": "&#9654;",
#                                "method": "animate",},],
#                   "type": "buttons",}],
#     sliders=[{"steps": [{"args": [[f.name],{"frame": {"duration": 0, "redraw": True}, "mode": "immediate",},],
#                          "label": f.name, "method": "animate",}
#                         for f in frames],
#              }],)


# Build App
app = JupyterDash(__name__)
app.layout = html.Div(
    [dcc.Graph(id="graph", figure=fig), 
     html.Button("Play", id="dashPlay", n_clicks=0),
     dcc.Slider(id="dashSlider", min=0, max=len(frames)-1, value=0, marks={i:{"label":str(i)} for i in range(len(frames))}),
     dcc.Interval(id="animateInterval", interval=400, n_intervals=0, disabled=True),
     html.Div(id="whichframe", children=[]),
    ],
)

# core update of figure on change of dash slider    
@app.callback(
    Output("whichframe", "children"),
    Output("graph", "figure"),
    Input("dashSlider", "value"),
)
def setFrame(frame):
    if frame:
        tfig = go.Figure(fig.frames[frame].data, frames=fig.frames, layout=fig.layout)
        try:
            tfig.layout['sliders'][0]['active'] = frame
        except IndexError:
            pass
        return frame, tfig
    else:
        return 0, fig

# start / stop Interval to move through frames
@app.callback(
    Output("animateInterval","disabled"),
    Input("dashPlay", "n_clicks"),
    State("animateInterval","disabled"),
)
def play(n_clicks, disabled):
    return not disabled
    
@app.callback(
    Output("dashSlider", "value"),
    Input("animateInterval", "n_intervals"),
    State("dashSlider", "value")
)
def doAnimate(i, frame):
    if frame < (len(frames)-1): 
        frame += 1
    else:
        frame = 0
    return frame

# Run app and display result inline in the notebook
app.run_server(mode="inline")