In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from dash import Dash, dcc, html, dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template
import dash_dangerously_set_inner_html
import plotly.express as px
import plotly.graph_objects as go

#For finding next free port
import socket

#Set the timeout
import os
os.environ['DASH_CALLBACK_TIMEOUT'] = '120000'  # 2 minutes

In [2]:
mta_data = pd.read_csv('./data/MTA_Daily_Ridership.csv',parse_dates=['Date'])

In [3]:
mta_data = mta_data.rename(columns={
            'Subways: Total Estimated Ridership' : 'Subways',
            'Subways: % of Comparable Pre-Pandemic Day' : 'Subways: % of Pre-Pandemic',
            'Buses: Total Estimated Ridership' : 'Buses',
            'Buses: % of Comparable Pre-Pandemic Day' : 'Buses: % of Pre-Pandemic',
            'LIRR: Total Estimated Ridership' : 'LIRR',
            'LIRR: % of Comparable Pre-Pandemic Day' : 'LIRR : % of Pre-Pandemic',
            'Metro-North: Total Estimated Ridership' : 'Metro-North',
            'Metro-North: % of Comparable Pre-Pandemic Day' : 'Metro-North: % of Pre-Pandemic',
            'Access-A-Ride: Total Scheduled Trips' : 'Access-A-Ride',
            'Access-A-Ride: % of Comparable Pre-Pandemic Day' : 'Access-A-Ride: % of Pre-Pandemic',
            'Bridges and Tunnels: Total Traffic' : 'Bridges and Tunnels',
            'Bridges and Tunnels: % of Comparable Pre-Pandemic Day' : 'Bridges and Tunnels: % of Pre-Pandemic',
            'Staten Island Railway: Total Estimated Ridership' : 'Staten Island Railway',
            'Staten Island Railway: % of Comparable Pre-Pandemic Day' : 'Staten Island Railway: % of Pre-Pandemic'
            },
            )

In [4]:
mta_data.sample(n=50)


Unnamed: 0,Date,Subways,Subways: % of Pre-Pandemic,Buses,Buses: % of Pre-Pandemic,LIRR,LIRR : % of Pre-Pandemic,Metro-North,Metro-North: % of Pre-Pandemic,Access-A-Ride,Access-A-Ride: % of Pre-Pandemic,Bridges and Tunnels,Bridges and Tunnels: % of Pre-Pandemic,Staten Island Railway,Staten Island Railway: % of Pre-Pandemic
1311,2023-10-03,4008201,70,1471939,65,235305,75,214111,74,32295,108,937269,101,7953,45
1509,2024-04-18,3966226,71,1303010,60,230634,74,203859,71,34816,120,955479,102,7271,45
218,2020-10-05,1678792,29,1102675,49,88694,28,42257,15,19971,67,783540,84,3587,20
390,2021-03-26,1938246,35,1091753,49,84436,27,49529,18,20805,70,865970,94,3536,22
432,2021-05-07,2238033,39,1222741,54,101637,32,61477,21,22730,77,936747,97,3827,22
253,2020-11-09,1752614,31,1111203,51,91681,28,42627,15,21444,69,773216,82,3561,21
496,2021-07-10,1781717,63,855908,63,72964,57,64429,42,14518,91,925950,100,2204,43
607,2021-10-29,3251779,57,1426938,63,153317,49,122632,42,22477,75,963442,104,6597,37
1455,2024-02-24,2308808,80,788962,62,114816,123,98942,76,19456,120,866204,104,2407,56
1673,2024-09-29,1959363,77,664367,60,117572,118,102805,98,22340,130,869345,98,2782,95


The following is The code for the Dash app I will keep it in one cell

In [12]:
mta_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1706 entries, 0 to 1705
Data columns (total 15 columns):
 #   Column                                    Non-Null Count  Dtype         
---  ------                                    --------------  -----         
 0   Date                                      1706 non-null   datetime64[ns]
 1   Subways                                   1706 non-null   int64         
 2   Subways: % of Pre-Pandemic                1706 non-null   int64         
 3   Buses                                     1706 non-null   int64         
 4   Buses: % of Pre-Pandemic                  1706 non-null   int64         
 5   LIRR                                      1706 non-null   int64         
 6   LIRR : % of Pre-Pandemic                  1706 non-null   int64         
 7   Metro-North                               1706 non-null   int64         
 8   Metro-North: % of Pre-Pandemic            1706 non-null   int64         
 9   Access-A-Ride                 

In [51]:
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

# Set the colours for all charts
full_colours = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
# Old Grey = #B3B3B3
colours = ['#1f77b4', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#CCCCCC']
colours_pct = ['#CCCCCC', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#e377c2']

services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

app = Dash(
    __name__, external_stylesheets=[dbc.themes.PULSE, dbc_css]
) 

server = app.server 

def create_title():
    """ 
    Creates the report title
    
    Returns: 
        the dbc.Col array with children that holds the html code for the title
    """
    return [
                dbc.Col(
                    [
                        html.H2(
                            id="report_title",
                            className="bg-primary text-white p-2 mb-2 text-center",
                        )
                    ]
                )
            ]
    
def create_granularity_dropdown():
    """
    Creates the granulatity dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Report Granularity:"),
                dcc.Dropdown(
                    ['Month',
                     'Quarter',
                     'Year'                     
                    ],
                    'Month',
                    id="granularity_dropdown",                                                                        
                    multi=False,
                    className="dbc",
                    clearable=False
                ),
             ],
             style={"border": "none"},
         )       
    ]     

def create_services_dropdown():
    """
    Creates the services dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """    
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Transport Mode:"),
                dcc.Dropdown(
                    options=[
                        {"label": service, "value": service}
                        for service in services
                    ],
                    value="all_values",
                    id="services_dropdown",                                                                        
                    multi=True,
                    className="dbc",
                ),
             ],
             style={"border": "none"},
         )       
    ]




def create_sparkline(service: list) -> go.Figure:
    max_date = mta_data["Date"].max()
    #min_date = (max_date - pd.DateOffset(years=1)).date()
    print(f'min_date type {type(min_date)}')
    print(f'min_date: {min_date}')
    last_year_data = mta_data[mta_data["Date"] > max_date - pd.DateOffset(years=1)]

    sparkline_figure = go.Figure()
    sparkline_figure.add_trace(
        go.Scatter(
            x=last_year_data,
            y=last_year_data[service],
            mode='lines',
            line=dict(width=2, color='blue'),
            showlegend=False,
        )
    )
    sparkline_figure.update_layout(
        height=80,
        margin=dict(l=10, r=10, t=10, b=10),
        xaxis=dict(visible=False),
        yaxis=dict(visible=False)
    )
    return sparkline_figure

def create_cards(card_services: list):
    return [
        dbc.Col(
            dbc.Card([
                dbc.CardBody([
                    dbc.CardHeader(f"{service} Ridership", className="text-center"),
                    html.P(["Last Year Trend"], className="text-center"),
                    dcc.Graph(
                        figure=create_sparkline(service),  # Your sparkline function
                        config={'displayModeBar': False}
                    ),
                ]),
            ]),
            width=4  # Adjust as needed
        )
        for service in card_services[:3]
    ]
    
    
app.layout = dbc.Container([
    dcc.Store(id='selected_services_store'),
    dbc.Row(create_title()),  # Title row
    dbc.Row([
        dbc.Col(create_granularity_dropdown()),
        dbc.Col(create_services_dropdown()),
    ]),
    dbc.Row(id='card_row'),  # Placeholder for cards
    dbc.Row([
        dbc.Col(
            dcc.Graph(id='service_line_chart')  # Graph Component
        )
    ]),
])

def create_thousand_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """
    Divides the ridership value by 1,000 to make the numbers easier to read

    Args:
    df: The dataframe to process

    Returns:
    df_thousands: The dataframe with the adjusted figures
    """
    
    df_thousands = df.copy()
    columns_to_divide = [
        'Subways', 
        'Buses', 
        'LIRR', 
        'Metro-North',
        'Access-A-Ride',
        'Bridges and Tunnels',
        'Staten Island Railway'
    ]
    # Perform the division and update only those columns
    df_thousands[columns_to_divide] = df[columns_to_divide] / 1000
    return df_thousands

def get_resample_value(granularity: str) -> str:   
    """
    Returns the resample value based on the selected granularity.    

    Args: 
        granularity: the string returned from the granularity dropdown

    Returns:
        selected granularity code to pass to resample. 
    """
    
    match granularity:
        case "Year":
            return "YE"
        case "Month":
            return "ME"
        case "Quarter":
            return "QE"
        case "Week":
            return "W"
        case _:
            return "ME"  # Default case for anything not matched


def resample_data(df: pd.DataFrame, granularity: str) -> pd.DataFrame:
    """
    Resample the dataframe to the selected granularity

    Args:
    df: The dataframe to resample
    granularity: The level of detail to use

    Returns
    resampled_df: The dataframe resampled to the specified granularity
    """  
    
    resample_value = get_resample_value(granularity)    
    resampled_df = df.resample(resample_value, on='Date').mean()    
    resampled_df.reset_index(inplace=True)
    if granularity == 'Year':
        resampled_df['Year'] = resampled_df['Date'].dt.year.astype(str) # Convert to string for better looking axis labels    
    return resampled_df




    
def create_service_line_chart(granular_data: pd.DataFrame, granularity: str, selected_services: list) -> go.Figure:
    """
    Create a Plotly line chart for service data based on selected granularity.

    Args:
        granular_data: The resampled dataframe.
        granularity: The selected granularity level.

    Returns:
        fig: A Plotly Graph Objects figure.
    """
    fig = go.Figure()
    print("Services")
    print(services)
    print('*'*80)
    # Create the chart for each service
    for service in selected_services:
        if service in granular_data.columns:
            if granularity == 'Year':
                fig.add_trace(
                    go.Scatter(                    
                        x=granular_data['Year'],
                        y=granular_data[service],
                        mode='lines',
                        name=service,
                        line=dict(color=full_colours[services.index(service)]),
                    )
                )    
            else:
                fig.add_trace(
                    go.Scatter(                    
                        x=granular_data['Date'],
                        y=granular_data[service],
                        mode='lines',
                        name=service,
                        line=dict(color=full_colours[services.index(service)]),
                    )
                )

    # Set chart title and axis labels
    fig.update_layout(
        title=f'{granularity} Ridership by Transportation Type',
        xaxis_title="Date",
        yaxis_title="Average Ridership (Thousands)",
        template="plotly_white",
    )

    # Format the X and Y axes
    fig.update_xaxes(showgrid=True, gridcolor="lightgrey")
    fig.update_yaxes(showgrid=True, gridcolor="lightgrey")

    return fig
   
  
def find_free_port(start_port=8700, max_port=8800):
    """   
    Finds the next available port starting from `start_port` up to `max_port`.    

    Args:
    start_port: the start of the port range. Default: 8700
    end_port: the end of the port range. Default: 8800

    Returns:
    port: The next available port in the range

    Raises:
    RuntimeError: If no free ports are available in the specified range.
    """
    
    for port in range(start_port, max_port):
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            try:
                s.bind(("127.0.0.1", port))  # Try binding to the port
                return port
            except OSError:
                continue  # Port is in use, try the next one
    raise RuntimeError("No free ports available in the specified range.")

@app.callback(
    Output('card_row', 'children'),  # Update the row with new cards
    Input('selected_services_store', 'data')  # Use the stored value as input
)
def update_cards(selected_services):
    if not selected_services:
        selected_services = services  # Default values
    return create_cards(selected_services)


@app.callback(
    [Output('report_title', 'children'),     
     Output('service_line_chart', 'figure'),
     Output('selected_services_store', 'data')],
    [Input('granularity_dropdown', 'value'),
     Input('services_dropdown', 'value'),
     Input('selected_services_store', 'data')],
    prevent_initial_call='initial_duplicate'
)
def display_information(granularity_dropdown_value,service_dropdown_value,selected_services):
    title = "MTA Ridership Dashboard"

    selected_services = service_dropdown_value if service_dropdown_value else services
    #selected_card_services = service_dropdown_value if service_dropdown_value else ['Subways','Buses','LIRR']
    # Resample data based on the selected granularity
    mta_thousands = create_thousand_dataframe(mta_data)
    granular_data = resample_data(mta_thousands, granularity_dropdown_value)

    
    # Pass resampled data to the chart function
    service_fig = create_service_line_chart(granular_data, granularity_dropdown_value, selected_services)

    return title, service_fig, selected_services
    

if __name__ == "__main__":   
    free_port = find_free_port()   
    print(f'Port used = {free_port}')
    app.run_server(debug=True, mode="inline", port=free_port) # Used for deployment in Jupyter Notebook
    #app.run_server(debug=True) # Used for deployment to Docker


Port used = 8712


Services
['Subways', 'Buses', 'LIRR', 'Metro-North', 'Access-A-Ride', 'Bridges and Tunnels', 'Staten Island Railway']
********************************************************************************
Services
['Subways', 'Buses', 'LIRR', 'Metro-North', 'Access-A-Ride', 'Bridges and Tunnels', 'Staten Island Railway']
********************************************************************************
min_date type <class 'datetime.date'>
min_date: 2023-10-31
min_date type <class 'datetime.date'>
min_date: 2023-10-31
min_date type <class 'datetime.date'>
min_date: 2023-10-31
Services
['Subways', 'Buses', 'LIRR', 'Metro-North', 'Access-A-Ride', 'Bridges and Tunnels', 'Staten Island Railway']
********************************************************************************
min_date type <class 'datetime.date'>
min_date: 2023-10-31
Services
['Subways', 'Buses', 'LIRR', 'Metro-North', 'Access-A-Ride', 'Bridges and Tunnels', 'Staten Island Railway']
*************************************************