In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly as pys
import ipywidgets as widgets
from ipywidgets import Layout

import _global_scripts as gs

## Purpose

**Investigation #6**: Average Drive Access Distance by Station (Model vs Observed) 

## Inputs

In [16]:
df_tdm_links = pd.read_csv(r"_data/base_link.csv")

df_obs_access_dist = pd.read_csv(r"_data/E2.6/OBS_Access_Egress_Distances_v2.csv")
df_obs_ungrouped = pd.read_csv(r"_data/base_observed_ungrouped_pa.csv")
#df_obs_data = pd.read_csv(r"D:/GitHub/OBS-TDM-Ridership-Compare/2023-OBS-TDM-Compare/data/2019-OBS/2019 Final Weighted UTA OD Data - 2022-04-05 - processed.csv")


Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.



In [3]:
def weighted_quantile(data, q, weights, factor):
    factored_weights = weights * factor
    rounded_weights = np.round(factored_weights).astype(int)
    repeat_data = np.repeat(data,rounded_weights).astype(float)

    return np.quantile(repeat_data, q)

def weighted_average(data, weights):
    return np.average(data, weights=weights)

## TDM

In [4]:
# read in stations and summarize tdm results
df_stations1 = gs.df_stations[['station','N']]

In [5]:
# get boarding and alighting station names
df_tdm_1 = (
    pd.merge(df_tdm_links, df_stations1, left_on="B", right_on="N", how="left")
    .rename(columns={"station": "brd_station"})
    .merge(df_stations1, left_on="A", right_on="N", how="left")
    .rename(columns={"station": "alt_station"})
    .drop(columns={'N_x','N_y'})
)

# filter out data that doesn't make sense and filter for specific access links
df_tdm_2 = df_tdm_1[
    ((df_tdm_1["brd_station"].notna()) & (df_tdm_1["A"] < 10000)) |
    ((df_tdm_1["alt_station"].notna()) & (df_tdm_1["B"] < 10000))
]
df_tdm_2 = df_tdm_2[df_tdm_2["Mode"].isin([80, 11])]
df_tdm_2["FromSkim_CRT"] = pd.to_numeric(df_tdm_2["FromSkim_CRT"], errors="coerce")

# filter to only drive access links and select specific columns
df_tdm_3 = df_tdm_2[['Mode','Period','AccessMode','brd_station','alt_station','Riders','Distance']]

# group by and sum up at the brd_station level
df_tdm_brd = (
    df_tdm_3
    .dropna(subset=["brd_station"])
    .groupby(['Mode','brd_station',"AccessMode"], as_index=False)
    .apply(lambda x: pd.Series({
        'Brd_PA': sum(x['Riders']),
        'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values,10000),
        'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values,10000),
        'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values,10000),
        'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
                            
    }))
)

df_tdm_brd['Source'] = 'TDM'
df_tdm_brd['station'] = df_tdm_brd['brd_station']
df_tdm_brd['Notes'] = 'Boardings represent only direct boardings to CRT.'
df_tdm_brd = df_tdm_brd[['Source','station','AccessMode','Brd_PA','Dist_15','Dist_Med','Dist_85','Dist_Avg','Notes']]

df_tdm_brd

  df_tdm_3


Unnamed: 0,Source,station,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,TDM,01-PROVO CENTRAL STATION,walk,421.53,0.24,0.56,0.56,0.425441,Boardings represent only direct boardings to CRT.
1,TDM,02-OREM CENTRAL STATION,walk,463.64,0.08,0.08,0.58,0.256846,Boardings represent only direct boardings to CRT.
2,TDM,03-AMERICAN FORK STATION,walk,81.71,0.42,0.42,0.77,0.55925,Boardings represent only direct boardings to CRT.
3,TDM,04-LEHI STATION,walk,221.96,0.25,0.58,0.66,0.474284,Boardings represent only direct boardings to CRT.
4,TDM,05-DRAPER STATION,walk,151.75,0.4,0.41,0.47,0.437257,Boardings represent only direct boardings to CRT.
5,TDM,06-SOUTH JORDAN STATION,walk,259.48,0.35,0.67,0.67,0.526463,Boardings represent only direct boardings to CRT.
6,TDM,07-MURRAY CENTRAL STATION,walk,541.66,0.33,0.45,0.75,0.521251,Boardings represent only direct boardings to CRT.
7,TDM,08-SALT LAKE CENTRAL STATION,walk,76.29,0.12,0.12,0.22,0.180298,Boardings represent only direct boardings to CRT.
8,TDM,09-NORTH TEMPLE STATION,walk,642.01,0.17,0.22,0.56,0.301644,Boardings represent only direct boardings to CRT.
9,TDM,10-WOODS CROSS STATION,walk,310.76,0.22,0.22,0.57,0.331028,Boardings represent only direct boardings to CRT.


## Observed

In [17]:
df_obs_1 = df_obs_ungrouped.merge(df_obs_access_dist, on = 'id')

In [7]:
boardings = df_obs_1.loc[df_obs_1['onoff'] == 'on']

boardings_total = (boardings.groupby(['station','AccessMode'])
                     .apply(lambda x: pd.Series({
                        'Brd_PA': sum(x['unlinked_weight_adj']),
                        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values,10000),
                        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values,10000),
                        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values,10000),
                        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)

                     }))
                     .reset_index()
                  )

boardings_total['Source'] = 'OBS'
boardings_total['Notes'] = 'Boardings represent all boardings, not just direct to CRT.'
boardings_total = boardings_total[['Source','station','AccessMode','Brd_PA','Dist_15','Dist_Med','Dist_85','Dist_Avg', 'Notes']]
boardings_total

  boardings_total = (boardings.groupby(['station','AccessMode'])


Unnamed: 0,Source,station,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,OBS,01-PROVO CENTRAL STATION,drive,2260.414,1.17,2.79,9.89,4.727752,"Boardings represent all boardings, not just di..."
1,OBS,01-PROVO CENTRAL STATION,walk,945.908,0.164252,0.371521,1.103047,0.545255,"Boardings represent all boardings, not just di..."
2,OBS,02-OREM CENTRAL STATION,drive,1460.04,0.95,2.85,4.83,3.319107,"Boardings represent all boardings, not just di..."
3,OBS,02-OREM CENTRAL STATION,walk,360.64,0.076044,0.310929,0.605386,0.4007,"Boardings represent all boardings, not just di..."
4,OBS,03-AMERICAN FORK STATION,drive,1410.593,2.09,3.98,9.58,5.402691,"Boardings represent all boardings, not just di..."
5,OBS,03-AMERICAN FORK STATION,walk,136.077,0.282029,0.824947,0.92142,0.839018,"Boardings represent all boardings, not just di..."
6,OBS,04-LEHI STATION,drive,1128.372,2.35,4.27,8.48,5.39814,"Boardings represent all boardings, not just di..."
7,OBS,04-LEHI STATION,walk,292.189,0.27289,0.351125,1.679277,0.896353,"Boardings represent all boardings, not just di..."
8,OBS,05-DRAPER STATION,drive,935.648,2.838472,4.54,8.32,5.075091,"Boardings represent all boardings, not just di..."
9,OBS,05-DRAPER STATION,walk,144.648,0.469682,1.03481,1.613032,1.072952,"Boardings represent all boardings, not just di..."


## Comparison of TDM and OBS

In [8]:
sumStats = pd.concat([boardings_total,df_tdm_brd],ignore_index=True)

In [9]:
def plotit(variable, access_mode):
    output.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:
    
        filtered_data = sumStats[sumStats['AccessMode'] == access_mode]
            
        # Create histogram
        fig = px.histogram(
            filtered_data, 
            x="station", 
            y=variable, 
            text_auto='.2s',
            color='Source', 
            barmode='group',
            height=400
        )
        fig.update_layout(
            xaxis_title="Station Name",
            yaxis_title=str(variable),
            legend_title="Model Version"
        )
        
        # Display the plot
        fig.show()
    
    else:
        firstTime = True

In [10]:
lstValues = list([
    'Dist_15',
    'Dist_Med',
    'Dist_85',
    'Dist_Avg'
])
accessModeOptions = ['drive', 'walk']

selectValues = widgets.Select(options=lstValues, value=('Dist_Med'), description = 'Select Variable')
selectAccessMode = widgets.Dropdown(options=accessModeOptions, value='drive', description='Access Mode')

# Set up a global variable to track whether the widgets have been changed
firstTime = False

# create output widget to display filtered DataFrame
output = widgets.Output()
hbox = widgets.HBox([selectValues, selectAccessMode])

# create interactive widget
interactive_output = widgets.interactive_output(plotit, {'variable':selectValues, 'access_mode': selectAccessMode})

display(hbox)
display(interactive_output)
display(output)

HBox(children=(Select(description='Select Variable', index=1, options=('Dist_15', 'Dist_Med', 'Dist_85', 'Dist…

Output()

Output()

## Conclusions

- The end of the line stations (Provo and Ogden) are showing lower drive access distances in the model than observed. 
    - In addition, the stations that immediately follow (Orem and Roy) are showing higher drive access distances in the model than observed.
    - This leads us to beleive that the model is forecasting trips that possible "bypass" the Provo and Ogden stations instead prefering to go one more station closer