In [1]:
import os
import pandas as pd
import plotly
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly as pys
import ipywidgets as widgets
from ipywidgets import Layout

import _global_scripts as gs

## Purpose

Increase RUNFACTOR for drive-to-crt drive access modes for BY_2019 & calculate trip length distributions for each one. Then compare with observed data.

 - **2.12.1**  RUNFACTOR = 2.0
 - **2.12.2**  RUNFACTOR = 2.5
 - **2.12.3**  RUNFACTOR = 3.0


## Inputs

In [2]:
# set whether you want PA or OD analysis
pa_od = 'PA'
pa_od_function = 'pa'

In [3]:
import pandas as pd

# Load observed summary data and clean up
df_tdm_obs = (
    pd.read_csv(f"_data/base_observed_summary_{pa_od_function}.csv", low_memory=False)
    .sort_values(by=['Source', 'station'], ascending=True)
    .reset_index(drop=True)
)

if pa_od == 'PA':
    df_tdm_obs.drop(columns=['Alt_PA', 'Alt_Direct_PA', 'Alt_Transfer_PA'], inplace=True)

# Define paths for boarding and rider summaries
summary_paths = {
#   "TDM-Recalib":                         ("_data/E2.14/WFv920-E2.14.1_BY_2019_transit_brding_summary_node.csv",
#                                           "_data/E2.14/WFv920-E2.14.1_BY_2019_transit_rider_summary_link.csv"),
#   "TDM-Recalib-100-divisor-removed":     ("_data/E2.14/WFv920-E2.14.2_BY_2019_transit_brding_summary_node.csv",
#                                           "_data/E2.14/WFv920-E2.14.2_BY_2019_transit_rider_summary_link.csv"),
    "TDM-Recalib":                          ("_data/E2.14/WFv920-E2.14.3_BY_2019_transit_brding_summary_node.csv", # with 2nd part removed
                                            "_data/E2.14/WFv920-E2.14.3_BY_2019_transit_rider_summary_link.csv"),
    "TDM_2.7.3":                           ("_data/E2.7/WFv920-E2.7.3_BY_2019_transit_brding_summary_node.csv",
                                            "_data/E2.7/WFv920-E2.7.3_BY_2019_transit_rider_summary_link.csv"),
    "TDM_ASC_All":                         ("_data/E2.13/WFv920-E2.13.2_BY_2019_transit_brding_summary_node.csv",
                                            "_data/E2.13/WFv920-E2.13.2_BY_2019_transit_rider_summary_link.csv"),
    "TDM_ASC_Purp":                        ("_data/E2.13/WFv920-E2.13.3_BY_2019_transit_brding_summary_node.csv",
                                            "_data/E2.13/WFv920-E2.13.3_BY_2019_transit_rider_summary_link.csv"),
    "TDM_ASC_Purp_Period":                 ("_data/E2.13/WFv920-E2.13.4_BY_2019_transit_brding_summary_node.csv",
                                            "_data/E2.13/WFv920-E2.13.4_BY_2019_transit_rider_summary_link.csv"),
    "TDM_ASC_Purp_Period_ModIvtEq":        ("_data/E2.13/WFv920-E2.13.5_BY_2019_transit_brding_summary_node.csv",
                                            "_data/E2.13/WFv920-E2.13.5_BY_2019_transit_rider_summary_link.csv")
}

# Read stations data
df_stations1 = gs.df_stations[['station', 'N']]

# Summarize TDM stats for each dataset
df_tdm_list = [df_tdm_obs]  # Start with observed data

for label, (boarding_path, rider_path) in summary_paths.items():
    df_tdm_list.append(gs.summarize_tdm_stats(boarding_path, rider_path, df_stations1, label, pa_od_function))

# Concatenate and clean the dataframe
df_tdm_obs_new = pd.concat(df_tdm_list, ignore_index=True)

# Further clean and finalize the dataframe
df_tdm_obs_new = (
    df_tdm_obs_new
    .round(3)
    .fillna(0)
    .drop(columns='index', errors='ignore')
    .loc[:, ~df_tdm_obs_new.columns.str.contains('^Unnamed')]
)

# Rename and display
sumStats = df_tdm_obs_new.copy()
display(df_tdm_obs_new)


Unnamed: 0,Source,station,AccessMode,Brd_PA,Brd_Direct_PA,Brd_Transfer_PA
0,OBS,01-PROVO CENTRAL STATION,drive,2283.379,0.00,0.00
1,OBS,01-PROVO CENTRAL STATION,walk,945.908,0.00,0.00
2,OBS,02-OREM CENTRAL STATION,drive,1475.024,0.00,0.00
3,OBS,02-OREM CENTRAL STATION,walk,371.804,0.00,0.00
4,OBS,03-AMERICAN FORK STATION,drive,1423.768,0.00,0.00
...,...,...,...,...,...,...
235,TDM_ASC_Purp_Period_ModIvtEq,11-FARMINGTON STATION,walk,195.450,134.93,60.52
236,TDM_ASC_Purp_Period_ModIvtEq,12-LAYTON STATION,walk,363.790,139.99,223.80
237,TDM_ASC_Purp_Period_ModIvtEq,13-CLEARFIELD STATION,walk,636.360,499.70,136.66
238,TDM_ASC_Purp_Period_ModIvtEq,14-ROY STATION,walk,401.810,362.55,39.26


In [4]:
# add a few more columns regarding percentage of boardings in relation to total boardings

# sum by source and station 
station_sum = sumStats.groupby(["Source", "station"], as_index=False).agg({
    f"Brd_{pa_od}": "sum",
    f"Brd_Direct_{pa_od}": "sum",
    f"Brd_Transfer_{pa_od}": "sum"
})

# add All accessMode
station_sum["AccessMode"] = "All"
sumStats2 = pd.concat([sumStats, station_sum], ignore_index=True) 

accessmode_sum = sumStats2.groupby(["Source", "AccessMode"], as_index=False).agg({
    f"Brd_{pa_od}": "sum",
    f"Brd_Direct_{pa_od}": "sum",
    f"Brd_Transfer_{pa_od}": "sum"
})

accessmode_sum.rename(columns={
    f"Brd_{pa_od}": f"Source_Brd_{pa_od}", 
    f"Brd_Direct_{pa_od}": f"Source_Brd_Direct_{pa_od}", 
    f"Brd_Transfer_{pa_od}": f"Source_Brd_Transfer_{pa_od}"}, inplace=True)
sumStatsP = sumStats2.merge(accessmode_sum, on=["Source", "AccessMode"], how="left")

sumStatsP[f"Brd_{pa_od}_Perc"]          = sumStatsP[f"Brd_{pa_od}"] / sumStatsP[f"Source_Brd_{pa_od}"]
sumStatsP[f"Brd_Direct_{pa_od}_Perc"]   = sumStatsP[f"Brd_Direct_{pa_od}"] / sumStatsP[f"Source_Brd_Direct_{pa_od}"]
sumStatsP[f"Brd_Transfer_{pa_od}_Perc"] = sumStatsP[f"Brd_Transfer_{pa_od}"] / sumStatsP[f"Source_Brd_Transfer_{pa_od}"]
sumStatsP


Unnamed: 0,Source,station,AccessMode,Brd_PA,Brd_Direct_PA,Brd_Transfer_PA,Source_Brd_PA,Source_Brd_Direct_PA,Source_Brd_Transfer_PA,Brd_PA_Perc,Brd_Direct_PA_Perc,Brd_Transfer_PA_Perc
0,OBS,01-PROVO CENTRAL STATION,drive,2283.379,0.00,0.00,14955.118,0.00,0.00,0.152682,,
1,OBS,01-PROVO CENTRAL STATION,walk,945.908,0.00,0.00,5672.892,0.00,0.00,0.166742,,
2,OBS,02-OREM CENTRAL STATION,drive,1475.024,0.00,0.00,14955.118,0.00,0.00,0.098630,,
3,OBS,02-OREM CENTRAL STATION,walk,371.804,0.00,0.00,5672.892,0.00,0.00,0.065540,,
4,OBS,03-AMERICAN FORK STATION,drive,1423.768,0.00,0.00,14955.118,0.00,0.00,0.095203,,
...,...,...,...,...,...,...,...,...,...,...,...,...
355,TDM_ASC_Purp_Period_ModIvtEq,11-FARMINGTON STATION,All,659.220,598.74,60.48,19932.920,16225.07,3707.85,0.033072,0.036902,0.016311
356,TDM_ASC_Purp_Period_ModIvtEq,12-LAYTON STATION,All,1556.700,1332.93,223.77,19932.920,16225.07,3707.85,0.078097,0.082152,0.060350
357,TDM_ASC_Purp_Period_ModIvtEq,13-CLEARFIELD STATION,All,1702.460,1565.85,136.61,19932.920,16225.07,3707.85,0.085409,0.096508,0.036843
358,TDM_ASC_Purp_Period_ModIvtEq,14-ROY STATION,All,1409.430,1370.20,39.23,19932.920,16225.07,3707.85,0.070709,0.084450,0.010580


In [5]:
def plotit(variable, access_mode):
    output.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:
    
        filtered_data = sumStatsP[sumStatsP['AccessMode'] == access_mode]
            
        # Create histogram
        fig = px.histogram(
            filtered_data, 
            x="station", 
            y=variable, 
            text_auto='.2s',
            color='Source', 
            barmode='group',
            height=400
        )
        fig.update_layout(
            xaxis_title="Station Name",
            yaxis_title=str(variable),
            legend_title="Model Version"
        )
        
        # Display the plot
        fig.show()
    
    else:
        firstTime = True

In [6]:
lstValues = list([
    f'Brd_{pa_od}',
    f'Brd_Direct_{pa_od}',
    f'Brd_Transfer_{pa_od}',
    f'Brd_{pa_od}_Perc',
    f'Brd_Direct_{pa_od}_Perc',
    f'Brd_Transfer_{pa_od}_Perc'])
accessModeOptions = ['drive', 'walk', 'All']

selectValues = widgets.Select(options=lstValues, value=(f'Brd_{pa_od}' ), description = 'Select Variable')
selectAccessMode = widgets.Dropdown(options=accessModeOptions, value='All', description='Access Mode')

# Set up a global variable to track whether the widgets have been changed
firstTime = False

# create output widget to display filtered DataFrame
output = widgets.Output()
hbox = widgets.HBox([selectValues, selectAccessMode])

# create interactive widget
interactive_output = widgets.interactive_output(plotit, {'variable':selectValues, 'access_mode': selectAccessMode})

display(hbox)
display(interactive_output)
display(output)

HBox(children=(Select(description='Select Variable', options=('Brd_PA', 'Brd_Direct_PA', 'Brd_Transfer_PA', 'B…

Output()

Output()

## Trip Length Distribution

In [7]:
# Files to copy to _large-files\WF-TDM-v9x-v920-E2\E2.12

#  2_DetailedTripMatrices\AllTrips_pkok.omx
#  2_DetailedTripMatrices\AllTrips_Pk.omx
#  2_DetailedTripMatrices\AllTrips_Ok.omx
#  1a_Skims\skm_d8_Pk.omx
#  1a_Skims\skm_d8_Ok.omx
#  1a_Skims\skm_w8_Pk.omx
#  1a_Skims\skm_w8_Ok.omx

In [8]:
sumStatsP

Unnamed: 0,Source,station,AccessMode,Brd_PA,Brd_Direct_PA,Brd_Transfer_PA,Source_Brd_PA,Source_Brd_Direct_PA,Source_Brd_Transfer_PA,Brd_PA_Perc,Brd_Direct_PA_Perc,Brd_Transfer_PA_Perc
0,OBS,01-PROVO CENTRAL STATION,drive,2283.379,0.00,0.00,14955.118,0.00,0.00,0.152682,,
1,OBS,01-PROVO CENTRAL STATION,walk,945.908,0.00,0.00,5672.892,0.00,0.00,0.166742,,
2,OBS,02-OREM CENTRAL STATION,drive,1475.024,0.00,0.00,14955.118,0.00,0.00,0.098630,,
3,OBS,02-OREM CENTRAL STATION,walk,371.804,0.00,0.00,5672.892,0.00,0.00,0.065540,,
4,OBS,03-AMERICAN FORK STATION,drive,1423.768,0.00,0.00,14955.118,0.00,0.00,0.095203,,
...,...,...,...,...,...,...,...,...,...,...,...,...
355,TDM_ASC_Purp_Period_ModIvtEq,11-FARMINGTON STATION,All,659.220,598.74,60.48,19932.920,16225.07,3707.85,0.033072,0.036902,0.016311
356,TDM_ASC_Purp_Period_ModIvtEq,12-LAYTON STATION,All,1556.700,1332.93,223.77,19932.920,16225.07,3707.85,0.078097,0.082152,0.060350
357,TDM_ASC_Purp_Period_ModIvtEq,13-CLEARFIELD STATION,All,1702.460,1565.85,136.61,19932.920,16225.07,3707.85,0.085409,0.096508,0.036843
358,TDM_ASC_Purp_Period_ModIvtEq,14-ROY STATION,All,1409.430,1370.20,39.23,19932.920,16225.07,3707.85,0.070709,0.084450,0.010580


In [9]:
sumStatsPg = sumStatsP.groupby(['Source','AccessMode'],as_index=False).agg(Brd_PA=('Brd_PA','sum'))
sumStatsPg

Unnamed: 0,Source,AccessMode,Brd_PA
0,OBS,All,20628.01
1,OBS,drive,14955.118
2,OBS,walk,5672.892
3,TDM,All,18575.28
4,TDM,drive,12159.62
5,TDM,walk,6415.66
6,TDM-Recalib,All,19851.71
7,TDM-Recalib,drive,13409.06
8,TDM-Recalib,walk,6442.65
9,TDM_2.7.3,All,19916.84


In [10]:

# Pivot to make AccessMode columns
pivot_df = sumStatsPg.pivot(index='Source', columns='AccessMode', values='Brd_PA').round(0)
pivot_df

AccessMode,All,drive,walk
Source,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
OBS,20628.0,14955.0,5673.0
TDM,18575.0,12160.0,6416.0
TDM-Recalib,19852.0,13409.0,6443.0
TDM_2.7.3,19917.0,13433.0,6484.0
TDM_ASC_All,19855.0,13421.0,6435.0
TDM_ASC_Purp,19857.0,13421.0,6436.0
TDM_ASC_Purp_Period,19863.0,13429.0,6434.0
TDM_ASC_Purp_Period_ModIvtEq,19933.0,13493.0,6440.0
