In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from dash import Dash, dcc, html, dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template
import dash_dangerously_set_inner_html
import plotly.express as px
import plotly.graph_objects as go

#For finding next free port
import socket

#Set the timeout
import os
os.environ['DASH_CALLBACK_TIMEOUT'] = '120000'  # 2 minutes

In [2]:
mta_data = pd.read_csv('./data/MTA_Daily_Ridership.csv',parse_dates=['Date'])

In [3]:
mta_data = mta_data.rename(columns={
            'Subways: Total Estimated Ridership' : 'Subways',
            'Subways: % of Comparable Pre-Pandemic Day' : 'Subways: % of Pre-Pandemic',
            'Buses: Total Estimated Ridership' : 'Buses',
            'Buses: % of Comparable Pre-Pandemic Day' : 'Buses: % of Pre-Pandemic',
            'LIRR: Total Estimated Ridership' : 'LIRR',
            'LIRR: % of Comparable Pre-Pandemic Day' : 'LIRR : % of Pre-Pandemic',
            'Metro-North: Total Estimated Ridership' : 'Metro-North',
            'Metro-North: % of Comparable Pre-Pandemic Day' : 'Metro-North: % of Pre-Pandemic',
            'Access-A-Ride: Total Scheduled Trips' : 'Access-A-Ride',
            'Access-A-Ride: % of Comparable Pre-Pandemic Day' : 'Access-A-Ride: % of Pre-Pandemic',
            'Bridges and Tunnels: Total Traffic' : 'Bridges and Tunnels',
            'Bridges and Tunnels: % of Comparable Pre-Pandemic Day' : 'Bridges and Tunnels: % of Pre-Pandemic',
            'Staten Island Railway: Total Estimated Ridership' : 'Staten Island Railway',
            'Staten Island Railway: % of Comparable Pre-Pandemic Day' : 'Staten Island Railway: % of Pre-Pandemic'
            },
            )

The following is The code for the Dash app I will keep it in one cell

In [4]:
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

# Set the colours for all charts
full_colours = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
# Old Grey = #B3B3B3
colours = ['#1f77b4', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#CCCCCC']
colours_pct = ['#CCCCCC', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#e377c2']

services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

app = Dash(
    __name__, external_stylesheets=[dbc.themes.PULSE, dbc_css]
) 

server = app.server 

def create_title():
    """ 
    Creates the report title
    
    Returns: 
        the dbc.Col array with children that holds the html co
    """
    return [
                dbc.Col(
                    [
                        html.H2(
                            id="report_title",
                            className="bg-primary text-white p-2 mb-2 text-center",
                        )
                    ]
                )
            ]
    
def create_granularity_dropdown():
    """
    Creates the granulatity dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Report Granularity:"),
                dcc.Dropdown(
                    ['Monthly',
                     'Quarterly',
                     'Yearly',
                     'Daily'
                    ],
                    'Monthly',
                    id="granularity_dropdown",                                                                        
                    multi=False,
                    className="dbc",
                ),
             ],
             style={"border": "none"},
         )       
    ]     

def create_services_dropdown():
    """
    Creates the services dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """
    """
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Transport Mode:"),
                dcc.Dropdown(
                    options=[
                        {"label": service, "value": service}
                        for service in services
                    ],
                    value="all_values",
                    id="services_dropdown",                                                                        
                    multi=True,
                    className="dbc",
                ),
             ],
             style={"border": "none"},
         )       
    ]

       
app.layout = dbc.Container(
    children=[
        dbc.Row(
            children = create_title()
        ),
        dbc.Row(
            [
                dbc.Col(
                    children = create_granularity_dropdown()                           
                ),
                dbc.Col(
                    children = create_services_dropdown()                           
                ),
            ]
        ),
        dbc.Row(            
            [
                dbc.Col(
                    [
                    dcc.Graph(id='service_line_chart')
                    ]
                )
            ]
        ),
    ]
)

def create_thousand_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """
    Divides the ridership value by 1,000 to make the numbers easier to read

    Args:
    df: The dataframe to process

    Returns:
    df_thousands: The dataframe with the adjusted figures
    """
    df_thousands = df.copy()
    columns_to_divide = [
        'Subways', 
        'Buses', 
        'LIRR', 
        'Metro-North',
        'Access-A-Ride',
        'Bridges and Tunnels',
        'Staten Island Railway'
    ]
    # Perform the division and update only those columns
    df_thousands[columns_to_divide] = df[columns_to_divide] / 1000
    return df_thousands

def get_resample_value(granularity: str) -> str:   
    """
    Returns the resample value based on the selected granularity.    

    Args: 
        granularity: the string returned from the granularity dropdown

    Returns:
        selected granularity code to pass to resample. 
    """
    match granularity:
        case "Year":
            return "YE"
        case "Month":
            return "ME"
        case "Quarter":
            return "QE"
        case "Week":
            return "W"
        case _:
            return "ME"  # Default case for anything not matched

def resample_data(df: pd.DataFrame, granularity: str) -> pd.DataFrame:
    """
    Resample the dataframe to the selected granularity

    Args:
    df: The dataframe to resample
    granularity: The level of detail to use

    Returns
    resampled_df: The dataframe resampled to the specified granularity
    """    
    resample_value = get_resample_value(granularity)
    resampled_df = df.resample(resample_value, on='Date').mean()    
    resampled_df.reset_index(inplace=True)
    if granularity == 'Year':
        resampled_df['Year'] = resampled_df['Date'].dt.year.astype(str) # Convert to string for better looking axis labels    
    return resampled_df

def create_service_line_chart(df: pd.DataFrame) -> go.Figure:
    """
    Create a Plotly line chart for service data.

    Args:
    df: The dataframe to use for the chart

    Returns:
    fig: A Plotly Graph Objects object
    """
    fig = go.Figure()    
    for service in services:
        fig.add_trace(
            go.Scatter(                
                x=df['Date'],
                y=df[service],
                mode='lines',
                name=service,
                line=dict(color=full_colours[services.index(service)])
            )
        )

    # Set chart title and axis labels
    fig.update_layout(
        title="Monthly Ridership by Transportation Type",
        xaxis_title="Date",
        yaxis_title="Average Ridership (Thousands)",
        template="plotly_white",        
    )

    # Format the X and Y axes
    fig.update_xaxes(
        showgrid=True,
        gridcolor="lightgrey",
        tickangle=45,
        tickformat="%Y"
    )
    fig.update_yaxes(showgrid=True, gridcolor="lightgrey")
       
    return fig
    
    

def find_free_port(start_port=8700, max_port=8800):
    """   
    Finds the next available port starting from `start_port` up to `max_port`.    

    Args:
    start_port: the start of the port range. Default: 8700
    end_port: the end of the port range. Default: 8800

    Returns:
    port: The next available port in the range

    Raises:
    RuntimeError: If no free ports are available in the specified range.
    """
    for port in range(start_port, max_port):
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            try:
                s.bind(("127.0.0.1", port))  # Try binding to the port
                return port
            except OSError:
                continue  # Port is in use, try the next one
    raise RuntimeError("No free ports available in the specified range.")
    
@app.callback(
    [Output('report_title', 'children'),
     Output('service_line_chart', 'figure')],
    [Input('granularity_dropdown', 'value'),
     Input('services_dropdown', 'value')]
)
def display_information(granularity_dropdown_value, service_dropdown_value):    
        """ 
        Code to create the visuals

        Args: 
        granualarity_dropdown_value: The selected granularity from the granularity dropdown
        service_dropdown_value: The selected service / services from the service_dropdown

        Returns:
        title: The report title
        service_fig: The line chart showing the MTA transport modes
        """ 
        if not service_dropdown_value:
            service_dropdown_value = services

        title = "MTA Ridership Dashboard"

        # Process data
        mta_thousands = create_thousand_dataframe(mta_data)       
        granular_data = resample_data(mta_thousands, granularity_dropdown_value)

        # Generate the line chart
        service_fig = create_service_line_chart(granular_data)
        return title, service_fig
    

if __name__ == "__main__":   
    free_port = find_free_port()    
    app.run_server(debug=True, mode="inline", port=free_port) # Used for deployment in Jupyter Notebook
    #app.run_server(debug=True) # Used for deployment to Docker


Selected granularity: Monthly
Selected services: all_values
Data after scaling:         Date   Subways  Subways: % of Pre-Pandemic     Buses  \
0 2020-03-01  2212.965                          97   984.908   
1 2020-03-02  5329.915                          96  2209.066   
2 2020-03-03  5481.103                          98  2228.608   
3 2020-03-04  5498.809                          99  2177.165   
4 2020-03-05  5496.453                          99  2244.515   

   Buses: % of Pre-Pandemic     LIRR  LIRR : % of Pre-Pandemic  Metro-North  \
0                        99   86.790                       100       55.825   
1                        99  321.569                       103      180.701   
2                        99  319.727                       102      190.648   
3                        97  311.662                        99      192.689   
4                       100  307.597                        98      194.386   

   Metro-North: % of Pre-Pandemic  Access-A-Ride  \
0       