In [19]:
import os
import pandas as pd
import plotly
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly as pys
import ipywidgets as widgets
from ipywidgets import Layout

import _global_scripts as gs

## Purpose

Show boardings data for commuter rail for all models

## Inputs

In [20]:
# set whether you want PA or OD analysis
pa_od = 'PA'
pa_od_function = 'pa'

In [None]:
import pandas as pd

# Load observed summary data and clean up
df_tdm_obs = (
    pd.read_csv(f"data-obs/base_observed_summary_{pa_od_function}.csv", low_memory=False)
    .sort_values(by=['Source', 'station'], ascending=True)
    .reset_index(drop=True)
)

if pa_od == 'PA':
    df_tdm_obs.drop(columns=['Alt_PA', 'Alt_Direct_PA', 'Alt_Transfer_PA'], inplace=True)

# get data from csv now
models_df = pd.read_csv("data-scenario/models.csv")

summary_models = models_df[
    models_df["model_display"] & models_df["model_RID"].notna()
]

summary_paths = {}

for _, row in summary_models.iterrows():
    model_id = row["model_id"]

    # Final copied filenames (no RUNID)
    node_filename = "transit_brding_summary_node.csv"
    link_filename = "transit_rider_summary_link.csv"

    summary_paths[model_id] = (
        f"data-scenario/{row['scenario_id']}/{row['model_name']}/{node_filename}",
        f"data-scenario/{row['scenario_id']}/{row['model_name']}/{link_filename}"
    )
    
# Read stations data
df_stations1 = gs.df_stations[['station', 'N', 'County']]

# Summarize TDM stats for each dataset
df_tdm_list = [df_tdm_obs]  # Start with observed data

for label, (boarding_path, rider_path) in summary_paths.items():
    df_tdm_list.append(gs.summarize_tdm_stats(boarding_path, rider_path, df_stations1, label, pa_od_function))

# Concatenate and clean the dataframe
df_tdm_obs_new = pd.concat(df_tdm_list, ignore_index=True)

# Further clean and finalize the dataframe
df_tdm_obs_new = (
    df_tdm_obs_new
    .round(3)
    .fillna(0)
    .drop(columns='index', errors='ignore')
    .loc[:, ~df_tdm_obs_new.columns.str.contains('^Unnamed')]
)

# Rename and display
sumStats = df_tdm_obs_new.copy()
display(df_tdm_obs_new)


In [None]:
# add a few more columns regarding percentage of boardings in relation to total boardings

# sum by source and station 
station_sum = sumStats.groupby(["Source", "station"], as_index=False).agg({
    f"Brd_{pa_od}": "sum",
    f"Brd_Direct_{pa_od}": "sum",
    f"Brd_Transfer_{pa_od}": "sum"
})

# add All accessMode
station_sum["AccessMode"] = "All"
sumStats2 = pd.concat([sumStats, station_sum], ignore_index=True) 

accessmode_sum = sumStats2.groupby(["Source", "AccessMode"], as_index=False).agg({
    f"Brd_{pa_od}": "sum",
    f"Brd_Direct_{pa_od}": "sum",
    f"Brd_Transfer_{pa_od}": "sum"
})

accessmode_sum.rename(columns={
    f"Brd_{pa_od}": f"Source_Brd_{pa_od}", 
    f"Brd_Direct_{pa_od}": f"Source_Brd_Direct_{pa_od}", 
    f"Brd_Transfer_{pa_od}": f"Source_Brd_Transfer_{pa_od}"}, inplace=True)
sumStatsP = sumStats2.merge(accessmode_sum, on=["Source", "AccessMode"], how="left")

sumStatsP[f"Brd_{pa_od}_Perc"]          = sumStatsP[f"Brd_{pa_od}"] / sumStatsP[f"Source_Brd_{pa_od}"]
sumStatsP[f"Brd_Direct_{pa_od}_Perc"]   = sumStatsP[f"Brd_Direct_{pa_od}"] / sumStatsP[f"Source_Brd_Direct_{pa_od}"]
sumStatsP[f"Brd_Transfer_{pa_od}_Perc"] = sumStatsP[f"Brd_Transfer_{pa_od}"] / sumStatsP[f"Source_Brd_Transfer_{pa_od}"]
sumStatsP


Unnamed: 0,Source,station,AccessMode,Brd_PA,Brd_Direct_PA,Brd_Transfer_PA,Source_Brd_PA,Source_Brd_Direct_PA,Source_Brd_Transfer_PA,Brd_PA_Perc,Brd_Direct_PA_Perc,Brd_Transfer_PA_Perc
0,OBS,01-OGDEN STATION,drive,1452.874,0.00,0.00,14955.118,0.00,0.00,0.097149,,
1,OBS,01-OGDEN STATION,walk,495.237,0.00,0.00,5672.892,0.00,0.00,0.087299,,
2,OBS,02-ROY STATION,drive,699.224,0.00,0.00,14955.118,0.00,0.00,0.046755,,
3,OBS,02-ROY STATION,walk,168.575,0.00,0.00,5672.892,0.00,0.00,0.029716,,
4,OBS,03-CLEARFIELD STATION,drive,745.345,0.00,0.00,14955.118,0.00,0.00,0.049839,,
...,...,...,...,...,...,...,...,...,...,...,...,...
535,v910-official__RTP_2050,11-DRAPER STATION,All,3885.870,3302.74,583.13,75187.770,55696.75,19491.02,0.051682,0.059299,0.029918
536,v910-official__RTP_2050,12-LEHI STATION,All,11092.050,9133.50,1958.55,75187.770,55696.75,19491.02,0.147525,0.163986,0.100485
537,v910-official__RTP_2050,13-AMERICAN FORK STATION,All,5015.690,4951.48,64.21,75187.770,55696.75,19491.02,0.066709,0.088901,0.003294
538,v910-official__RTP_2050,14-OREM CENTRAL STATION,All,2864.850,2025.09,839.76,75187.770,55696.75,19491.02,0.038103,0.036359,0.043084


In [None]:
def plotit(variable, access_mode):
    output.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:
    
        filtered_data = sumStatsP[sumStatsP['AccessMode'] == access_mode]
            
        # Create histogram
        fig = px.histogram(
            filtered_data, 
            x="station", 
            y=variable, 
            text_auto='.2s',
            color='Source', 
            barmode='group',
            height=400
        )
        fig.update_layout(
            xaxis_title="Station Name",
            yaxis_title=str(variable),
            legend_title="Model Version"
        )
        
        # Display the plot
        fig.show()
    
    else:
        firstTime = True

In [None]:
lstValues = list([
    f'Brd_{pa_od}',
    f'Brd_Direct_{pa_od}',
    f'Brd_Transfer_{pa_od}',
    f'Brd_{pa_od}_Perc',
    f'Brd_Direct_{pa_od}_Perc',
    f'Brd_Transfer_{pa_od}_Perc'])
accessModeOptions = ['drive', 'walk', 'All']

selectValues = widgets.Select(options=lstValues, value=(f'Brd_{pa_od}' ), description = 'Select Variable')
selectAccessMode = widgets.Dropdown(options=accessModeOptions, value='All', description='Access Mode')

# Set up a global variable to track whether the widgets have been changed
firstTime = False

# create output widget to display filtered DataFrame
output = widgets.Output()
hbox = widgets.HBox([selectValues, selectAccessMode])

# create interactive widget
interactive_output = widgets.interactive_output(plotit, {'variable':selectValues, 'access_mode': selectAccessMode})

display(hbox)
display(interactive_output)
display(output)

HBox(children=(Select(description='Select Variable', options=('Brd_PA', 'Brd_Direct_PA', 'Brd_Transfer_PA', 'B…

Output()

Output()

In [None]:
sumStatsP.rename(columns={'Source': 'model_id'}).to_csv("crt-improvements-app/data/boardings-stats.csv", index=False)
print("Exported boardings-stats.csv")

Exported boardings-stats.csv


In [None]:
def plotitbycounty(variable, access_mode):
    output2.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:
    
        filtered_data = sumStatsP[sumStatsP['AccessMode'] == access_mode]
        filtered_data = pd.merge(filtered_data, df_stations1[['station', 'County']], on='station', how='left')
        filtered_data = filtered_data.groupby(['County','Source'], as_index=False).agg(Brd_PA=('Brd_PA','sum'))
            
        # Create histogram
        fig = px.histogram(
            filtered_data, 
            x="County", 
            y=variable, 
            text_auto='.2s',
            color='Source', 
            barmode='group',
            height=400
        )
        fig.update_layout(
            xaxis_title="County",
            yaxis_title=str(variable),
            legend_title="Model Version"
        )
        
        # Display the plot
        fig.show()
    
    else:
        firstTime = True

In [None]:
selectValues2 = widgets.Select(options=lstValues, value=(f'Brd_{pa_od}' ), description = 'Select Variable')
selectAccessMode2 = widgets.Dropdown(options=accessModeOptions, value='All', description='Access Mode')

# Set up a global variable to track whether the widgets have been changed
firstTime = False

# create output widget to display filtered DataFrame
output2 = widgets.Output()
hbox2 = widgets.HBox([selectValues2, selectAccessMode2])

# create interactive widget
interactive_output2 = widgets.interactive_output(plotitbycounty, {'variable':selectValues2, 'access_mode': selectAccessMode2})

display(hbox2)
display(interactive_output2)
display(output2)

HBox(children=(Select(description='Select Variable', options=('Brd_PA', 'Brd_Direct_PA', 'Brd_Transfer_PA', 'B…

Output()

Output()