In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from dash import Dash, dcc, html, dash_table
import dash_bootstrap_components as dbc
from dash.dependencies import Output, Input
from dash.exceptions import PreventUpdate
from dash_bootstrap_templates import load_figure_template
import dash_dangerously_set_inner_html
import plotly.express as px
import plotly.graph_objects as go

#For finding next free port
import socket

# Library to store data in dcc.Store
import json
from io import StringIO # This is to solve the problem of future pandas versions removing the ability to directly pass a JSON string to read_json
#Set the timeout
#import os
#os.environ['DASH_CALLBACK_TIMEOUT'] = '120000'  # 2 minutes

In [2]:
mta_data = pd.read_csv('./data/MTA_Daily_Ridership.csv',parse_dates=['Date'])

In [3]:
mta_data = mta_data.rename(columns={
            'Subways: Total Estimated Ridership' : 'Subways',
            'Subways: % of Comparable Pre-Pandemic Day' : 'Subways: % of Pre-Pandemic',
            'Buses: Total Estimated Ridership' : 'Buses',
            'Buses: % of Comparable Pre-Pandemic Day' : 'Buses: % of Pre-Pandemic',
            'LIRR: Total Estimated Ridership' : 'LIRR',
            'LIRR: % of Comparable Pre-Pandemic Day' : 'LIRR : % of Pre-Pandemic',
            'Metro-North: Total Estimated Ridership' : 'Metro-North',
            'Metro-North: % of Comparable Pre-Pandemic Day' : 'Metro-North: % of Pre-Pandemic',
            'Access-A-Ride: Total Scheduled Trips' : 'Access-A-Ride',
            'Access-A-Ride: % of Comparable Pre-Pandemic Day' : 'Access-A-Ride: % of Pre-Pandemic',
            'Bridges and Tunnels: Total Traffic' : 'Bridges and Tunnels',
            'Bridges and Tunnels: % of Comparable Pre-Pandemic Day' : 'Bridges and Tunnels: % of Pre-Pandemic',
            'Staten Island Railway: Total Estimated Ridership' : 'Staten Island Railway',
            'Staten Island Railway: % of Comparable Pre-Pandemic Day' : 'Staten Island Railway: % of Pre-Pandemic'
            },
            )

The following is The code for the Dash app I will keep it in one cell

In [13]:
dbc_css = "https://cdn.jsdelivr.net/gh/AnnMarieW/dash-bootstrap-templates/dbc.min.css"
services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

# Set the colours for all charts
full_colours = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
# Old Grey = #B3B3B3
colours = ['#1f77b4', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#CCCCCC']
colours_pct = ['#CCCCCC', '#CCCCCC', '#CCCCCC', '#CCCCCC', '#9467bd', '#CCCCCC', '#e377c2']

# Font Colours
dark_blue = '#134770'
dark_orange = '#D35940'

services = ['Subways',
            'Buses',
            'LIRR',
            'Metro-North',
            'Access-A-Ride',
            'Bridges and Tunnels',
            'Staten Island Railway']

app = Dash(
    __name__, external_stylesheets=[dbc.themes.PULSE, dbc_css]
) 

server = app.server 

def create_title():
    """ 
    Creates the report title
    
    Returns: 
        the dbc.Col array with children that holds the html code for the title
    """
    return [
                dbc.Col(
                    [
                        html.H2(
                            id="report_title",
                            className="bg-primary text-white p-2 mb-2 text-center",
                        )
                    ]
                )
            ]
    
def create_granularity_dropdown():
    """
    Creates the granulatity dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Report Granularity:"),
                dcc.Dropdown(
                    ['Month',
                     'Quarter',
                     'Year'                     
                    ],
                    'Month',
                    id="granularity_dropdown",                                                                        
                    multi=False,
                    className="dbc",
                    clearable=False
                ),
             ],
             style={"border": "none"},
         )       
    ]     

def create_services_dropdown():
    """
    Creates the services dropdown

    Returns: 
        the dbc.Card array with children that holds the dropdown
    """    
    return [
        dbc.Card(
            [ 
                dcc.Markdown("Select A Transportation Type:"),
                dcc.Dropdown(
                    options=[
                        {"label": service, "value": service}
                        for service in services
                    ],
                    value="all_services",
                    id="services_dropdown",                                                                        
                    multi=True,
                    className="dbc",
                ),
             ],
             style={"border": "none"},
         )       
    ]




def create_sparkline(granular_data: pd.DataFrame, service: list, granularity: str, metrics: dict) -> go.Figure:

    if granularity == 'Year':
        max_date = granular_data['Year'].max()
        last_year_data = granular_data[granular_data["Year"] >= max_date - 1]
    else:
        max_date = granular_data["Date"].max()
        last_year_data = granular_data[granular_data["Date"] > max_date - pd.DateOffset(years=1)]            
    #min_date = (max_date - pd.DateOffset(years=1)).date()    
    
    percent_change = metrics[service]['percent_change']
    if percent_change >= 0:
        line_colour = dark_blue
    else:
        line_colour = dark_orange
        
    sparkline_figure = go.Figure()
    sparkline_figure.add_trace(
        go.Scatter(
            x=last_year_data['Date'],  # x-axis is the Date
            y=last_year_data[service],  # y-axis is the ridership value
            mode='lines',
            line=dict(width=2, color=line_colour),
            showlegend=False,
            text=last_year_data[service].apply(lambda x: f"{x:,.0f}"),  # Format text to display ridership values
            hovertemplate='%{text}<extra></extra>',  # Display the ridership value on hover
        )    
    )
    sparkline_figure.update_layout(
        height=80,
        margin=dict(l=10, r=10, t=10, b=10),
        xaxis=dict(visible=False),
        yaxis=dict(visible=False),
        plot_bgcolor='rgba(0,0,0,0)', # Transparent background for plot area
        paper_bgcolor='rgba(0,0,0,0)' # Transparent background for whole figure
    )
    return sparkline_figure


def create_cards(granular_data, selected_services, granularity, metrics):
    up_arrow = chr(8593)  # Upward arrow (↑)
    down_arrow = chr(8595)  # Downward arrow (↓)
    cards = []
        
    for service in selected_services[:3]:        
        ridership_last_period = metrics[service]['ridership_last_period']
        percent_change = metrics[service]['percent_change']

        if percent_change > 0:
            percent_change_style = {'margin-top': '0', 'margin-bottom': '0.2em', 'color': dark_blue}
            percent_change_text = f'% Change: {percent_change:.1f}% {up_arrow}'
            card_text_style = {'font-size': '4em',
                               'font-weight': 'bold',
                               'margin-top': '0', 
                               'margin-bottom': '0.2em', 
                               'color': dark_blue}
        elif percent_change < 0:
            percent_change_style = {'margin-top': '0', 'margin-bottom': '0.2em', 'color': dark_orange}
            percent_change_text = f'% Change: {percent_change:.1f}% {down_arrow}'
            card_text_style = {'font-size': '4em',
                               'font-weight': 'bold',
                               'margin-top': '0', 
                               'margin-bottom': '0.2em', 
                               'color': dark_orange}
        else:
            percent_change_style = {'margin-top': '0', 'margin-bottom': '0.2em'}
            percent_change_text = f'% Change: {percent_change:.1f}%'
            card_text_style = {'font-size': '4em',
                               'font-weight': 'bold',
                               'margin-top': '0', 
                               'margin-bottom': '0.2em'}       
        cards.append(
            dbc.Col(
                dbc.Card(
                    dbc.CardBody([
                        html.H5(service, className='card-title'),
                        html.P(f'Avg {granularity}ly Ridership', style={'margin-bottom': '0.2em'}),
                        html.P(ridership_last_period, 
                               className='card-text',
                               style=card_text_style),                        
                            html.P(percent_change_text, 
                                   className='card-text',
                                   style=percent_change_style),                       
                                 
                        dcc.Graph(
                            figure=create_sparkline(granular_data, service, granularity,metrics),  # Your sparkline function
                            config={'displayModeBar': False}                        
                        ),
                    ]),                
                ),
                width=4,
            ),           
        )
    return cards 
    
app.layout = dbc.Container([
    #Store data for later use.
    dcc.Store(id='selected_services_store'),
    dcc.Store(id='granular_data_json_store'),
    dcc.Store(id='granularity_store'),
    dcc.Store(id='metrics_store'),
    
    dbc.Row(create_title()),  # Title row
    dbc.Row([
        dbc.Col(create_granularity_dropdown()),
        dbc.Col(create_services_dropdown()),
    ]),
    dbc.Row(id='card_row'),  # Placeholder for cards
    dbc.Row([
        dbc.Col(
            dcc.Graph(id='service_line_chart'),  # Graph Component
            width=11
        ),        
    ]),
])

def create_thousand_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """
    Divides the ridership value by 1,000 to make the numbers easier to read

    Args:
    df: The dataframe to process

    Returns:
    df_thousands: The dataframe with the adjusted figures
    """
    
    df_thousands = df.copy()
    columns_to_divide = [
        'Subways', 
        'Buses', 
        'LIRR', 
        'Metro-North',
        'Access-A-Ride',
        'Bridges and Tunnels',
        'Staten Island Railway'
    ]
    # Perform the division and update only those columns
    df_thousands[columns_to_divide] = round(df[columns_to_divide] / 1000,0)
    return df_thousands

def get_resample_value(granularity: str) -> str:   
    """
    Returns the resample value based on the selected granularity.    

    Args: 
        granularity: the string returned from the granularity dropdown

    Returns:
        selected granularity code to pass to resample. 
    """
    
    match granularity:
        case "Year":
            return "YE"
        case "Month":
            return "ME"
        case "Quarter":
            return "QE"
        case "Week":
            return "W"
        case _:
            return "ME"  # Default case for anything not matched


def resample_data(df: pd.DataFrame, granularity: str) -> pd.DataFrame:
    """
    Resample the dataframe to the selected granularity

    Args:
    df: The dataframe to resample
    granularity: The level of detail to use

    Returns
    resampled_df: The dataframe resampled to the specified granularity
    """  
def resample_data(df: pd.DataFrame, granularity: str) -> pd.DataFrame:
    """
    Resample the dataframe to the selected granularity.
    
    Args:
        df: The dataframe to resample
        granularity: The level of detail to use
    
    Returns:
        resampled_df: The dataframe resampled to the specified granularity
    """
    resample_value = get_resample_value(granularity)
    
    
    resampled_df = df.resample(resample_value, on='Date').mean() # Resampling and aggregating data using mean        
    resampled_df = resampled_df.round().astype(int) # Round the resampled data before converting to integer    
    resampled_df.reset_index(inplace=True) # Reset index to make 'Date' a column again  
    # If the granularity is Year, add the 'Year' column
    if granularity == 'Year':
        resampled_df['Year'] = resampled_df['Date'].dt.year.astype(str)  # Use str for better axis label formatting
    
    return resampled_df

    
def create_service_line_chart(granular_data: pd.DataFrame, granularity: str, selected_services: list) -> go.Figure:
    """
    Create a Plotly line chart for service data based on selected granularity.

    Args:
        granular_data: The resampled dataframe.
        granularity: The selected granularity level.

    Returns:
        fig: A Plotly Graph Objects figure.
    """
    fig = go.Figure()    
    # Create the chart for each service
    for service in selected_services:
        if service in granular_data.columns:
            if granularity == 'Year':
                fig.add_trace(
                    go.Scatter(                    
                        x=granular_data['Year'],
                        y=granular_data[service],
                        mode='lines',
                        name=service,
                        line=dict(color=full_colours[services.index(service)]),
                    )
                )    
            else:
                fig.add_trace(
                    go.Scatter(                    
                        x=granular_data['Date'],
                        y=granular_data[service],
                        mode='lines',
                        name=service,
                        line=dict(color=full_colours[services.index(service)]),
                    )
                )

    # Set chart title and axis labels
    fig.update_layout(
        title=f'{granularity} Ridership by Transportation Type',
        xaxis_title=None,
        yaxis_title="Average Ridership (Thousands)",
        template="plotly_white",
    )

    # Format the X and Y axes
    fig.update_xaxes(showgrid=True, gridcolor="lightgrey")
    fig.update_yaxes(showgrid=True, gridcolor="lightgrey")

    return fig
   
  
def find_free_port(start_port=8700, max_port=8800):
    """   
    Finds the next available port starting from `start_port` up to `max_port`.    

    Args:
    start_port: the start of the port range. Default: 8700
    end_port: the end of the port range. Default: 8800

    Returns:
    port: The next available port in the range

    Raises:
    RuntimeError: If no free ports are available in the specified range.
    """
    
    for port in range(start_port, max_port):
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            try:
                s.bind(("127.0.0.1", port))  # Try binding to the port
                return port
            except OSError:
                continue  # Port is in use, try the next one
    raise RuntimeError("No free ports available in the specified range.")

@app.callback(
    Output('card_row', 'children'),  # Update the row with new cards
    [Input('selected_services_store', 'data'),  # Use the stored value as input
    Input('granular_data_json_store', 'data'),
    Input('granularity_store','data'),
    Input('metrics_store','data'),],
)
def update_cards(selected_services,granular_data_json, granularity, metrics_json):
    metrics = json.loads(metrics_json)
    if granular_data_json is None:
        print("No granular data available.")
        return []  # Return an empty list if no data is available
        
    granular_data = pd.read_json(StringIO(granular_data_json), orient='split')
    
    if not selected_services:
        selected_services = services  # Default values
    return create_cards(granular_data, selected_services, granularity, metrics)


def create_metrics(granular_data: pd.DataFrame, selected_services: list) -> dict:
    # Metrics Calculation
    metrics = {}    
    for service in selected_services:
        # Extract last and second-last periods for the service
        last_period_value = granular_data[service].iloc[-1]
        
        previous_period_value = granular_data[service].iloc[-2]        
        # Calculate metrics
        ridership_last_period = '{:,}K'.format(int(last_period_value))        
        percent_change = round(((last_period_value - previous_period_value) / previous_period_value) * 100,2)
    
        # Store metrics in the dictionary
        metrics[service] = {
            "ridership_last_period": ridership_last_period,
            "percent_change": percent_change
        }
    return metrics

    
@app.callback(
    [Output('report_title', 'children'),     
     Output('service_line_chart', 'figure'),
     Output('selected_services_store', 'data'),
     Output('granular_data_json_store', 'data'),
     Output('granularity_store', 'data'),
     Output('metrics_store', 'data')],
    [Input('granularity_dropdown', 'value'),
     Input('services_dropdown', 'value'),
     Input('selected_services_store', 'data')],     
    prevent_initial_call='initial_duplicate'
)
def display_information(granularity_dropdown_value,service_dropdown_value,selected_services):
    title = "MTA Ridership Dashboard"
    
    selected_services = (
        services if service_dropdown_value == 'all_services' or not service_dropdown_value
        else service_dropdown_value
    )
    
    mta_thousands = create_thousand_dataframe(mta_data)
    granular_data = resample_data(mta_thousands, granularity_dropdown_value)
    
    # Convert DataFrame to JSON
    granular_data_json = granular_data.to_json(orient='split')
    
    service_fig = create_service_line_chart(granular_data, granularity_dropdown_value, selected_services)        

    # Create JSON metrics
    metrics_json = json.dumps(create_metrics(granular_data,selected_services))
  
    return title, service_fig, selected_services, granular_data_json, granularity_dropdown_value, metrics_json
    

if __name__ == "__main__":   
    free_port = find_free_port()   
    print(f'Port used = {free_port}')
    app.run_server(debug=True, mode="inline", port=free_port) # Used for deployment in Jupyter Notebook
    #app.run_server(debug=True) # Used for deployment to Docker


Port used = 8706
