In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly as pys
import ipywidgets as widgets
from ipywidgets import Layout
import os
import _global_scripts as gs

## Purpose

**Investigation #6**: Average Drive Access Distance by Station (Model vs Observed) 

## Inputs

In [2]:
# get data from csv now
models_df = pd.read_csv("data-scenario/models.csv")

# Filter to only displayed models
summary_models = models_df[
    models_df["model_display"] & models_df["model_RID"].notna()
]

# Generate filenames and sourcenames
tdm_filenames = []
tdm_sourcenames = []

for _, row in summary_models.iterrows():
    scenario_id = row["scenario_id"]
    model_id = row["model_id"]
    model_name = row["model_name"]
    scenario_label = row["model_label"]  # e.g., "Recalib-RF2.5 - BY 2019"

    # File path matches copied filename pattern (without RUNID)
    link_path = f"data-scenario/{scenario_id}/{model_name}/transit_rider_summary_link.csv"

    # Optionally verify file exists before including
    if os.path.exists(link_path):
        tdm_filenames.append(link_path)
        tdm_sourcenames.append(f"{model_id}")
    else:
        print(f"⚠️ File not found: {link_path}")



df_obs_access_dist = pd.read_csv(r"data-obs/OBS_Access_Egress_Distances_v2.csv")
df_obs_ungrouped = pd.read_csv(r"data-obs/base_observed_ungrouped_pa.csv")
#df_obs_data = pd.read_csv(r"D:/GitHub/OBS-TDM-Ridership-Compare/2023-OBS-TDM-Compare/data/2019-OBS/2019 Final Weighted UTA OD Data - 2022-04-05 - processed.csv")7

In [3]:
def weighted_quantile(data, q, weights, factor):
    factored_weights = weights * factor
    rounded_weights = np.round(factored_weights).astype(int)
    repeat_data = np.repeat(data,rounded_weights).astype(float)

    return np.quantile(repeat_data, q)

def weighted_average(data, weights):
    return np.average(data, weights=weights)

## TDM

In [4]:
# read in stations and summarize tdm results
df_stations1 = gs.df_stations[['station','N']]

In [5]:
df_tdm_brd = pd.DataFrame()

for tdm_filename, tdm_sourcename in zip(tdm_filenames, tdm_sourcenames):
    print(tdm_sourcename)
    df_tdm_links = pd.read_csv(tdm_filename, low_memory=False)

    # Merge to get boarding and alighting station names
    df_tdm_1 = (
        df_tdm_links
        .merge(df_stations1, left_on="B", right_on="N", how="left")
        .rename(columns={"station": "brd_station"})
        .merge(df_stations1, left_on="A", right_on="N", how="left")
        .rename(columns={"station": "alt_station"})
        .drop(columns=['N_x', 'N_y'])
        .copy()
    )

    # Filter out invalid data and keep specific access links
    df_tdm_2 = df_tdm_1[
        ((df_tdm_1["brd_station"].notna()) & (df_tdm_1["A"] < 10000)) |
        ((df_tdm_1["alt_station"].notna()) & (df_tdm_1["B"] < 10000))
    ].copy()

    df_tdm_2 = df_tdm_2[df_tdm_2["Mode"].isin([80, 11])].copy()
    df_tdm_2["FromSkim_CRT"] = pd.to_numeric(df_tdm_2["FromSkim_CRT"], errors="coerce")

    # Keep only drive access links and select necessary columns
    df_tdm_3 = df_tdm_2[['Mode', 'Period', 'AccessMode', 'brd_station', 'alt_station', 'Riders', 'Distance']].copy()

    # Aggregate data at boarding station level
    df_tdm_brd_source = (
        df_tdm_3
        .dropna(subset=["brd_station"])
        .groupby(['Mode', 'brd_station', "AccessMode"], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
    )
    
    # Compute totals across all AccessModes
    df_tdm_brd_all = (
        df_tdm_3
        .dropna(subset=["brd_station"])
        .groupby(['Mode', 'brd_station'], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
        .reset_index()
    )

    # Add "All" access mode
    df_tdm_brd_all["AccessMode"] = "All"

    # Combine both grouped and "All"
    df_tdm_brd_source = pd.concat([df_tdm_brd_source, df_tdm_brd_all], ignore_index=True)

    # Add metadata columns
    df_tdm_brd_source['Source'] = tdm_sourcename
    df_tdm_brd_source['station'] = df_tdm_brd_source['brd_station']
    df_tdm_brd_source['Notes'] = 'Boardings represent only direct boardings to CRT.'

    # Reorder and select final columns
    df_tdm_brd_source = df_tdm_brd_source[
        ['Source', 'station', 'AccessMode', 'Brd_PA', 'Dist_15', 'Dist_Med', 'Dist_85', 'Dist_Avg', 'Notes']
    ]

    # Append to the main DataFrame
    df_tdm_brd = pd.concat([df_tdm_brd, df_tdm_brd_source], ignore_index=True)

df_tdm_brd


v910-official__BY_2019


  df_tdm_3
  df_tdm_3


v910-official__RTP_2050


  df_tdm_3
  df_tdm_3


E2.14.4__BY_2019


  df_tdm_3
  df_tdm_3


E2.14.4__RTP_2050


  df_tdm_3
  df_tdm_3


E2.16.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.16.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.17.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_2050


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_MedDistAdj_2050


  df_tdm_3
  df_tdm_3


E2.18.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.4__BY_2019


  df_tdm_3
  df_tdm_3


Unnamed: 0,Source,station,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,v910-official__BY_2019,01-OGDEN STATION,walk,73.18,0.28,0.44,0.83,0.469794,Boardings represent only direct boardings to CRT.
1,v910-official__BY_2019,02-ROY STATION,walk,368.09,0.37,0.45,0.68,0.521878,Boardings represent only direct boardings to CRT.
2,v910-official__BY_2019,03-CLEARFIELD STATION,walk,474.87,0.29,0.63,0.64,0.524257,Boardings represent only direct boardings to CRT.
3,v910-official__BY_2019,04-LAYTON STATION,walk,128.15,0.17,0.17,0.66,0.304063,Boardings represent only direct boardings to CRT.
4,v910-official__BY_2019,05-FARMINGTON STATION,walk,119.91,0.17,0.17,0.82,0.300855,Boardings represent only direct boardings to CRT.
...,...,...,...,...,...,...,...,...,...
715,E2.18.4__BY_2019,11-DRAPER STATION,All,1122.41,2.48,4.41,6.29,4.579222,Boardings represent only direct boardings to CRT.
716,E2.18.4__BY_2019,12-LEHI STATION,All,1267.44,1.97,3.40,6.29,3.953310,Boardings represent only direct boardings to CRT.
717,E2.18.4__BY_2019,13-AMERICAN FORK STATION,All,1930.26,2.12,3.58,6.24,3.925441,Boardings represent only direct boardings to CRT.
718,E2.18.4__BY_2019,14-OREM CENTRAL STATION,All,1143.69,1.99,3.52,5.26,3.631628,Boardings represent only direct boardings to CRT.


## Observed

In [6]:
df_obs_1 = df_obs_ungrouped.merge(df_obs_access_dist, on = 'id')

In [7]:
boardings = df_obs_1.loc[df_obs_1['onoff'] == 'on']

boardings_total = (boardings.groupby(['station','AccessMode'])
                     .apply(lambda x: pd.Series({
                        'Brd_PA': sum(x['unlinked_weight_adj']),
                        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values,10000),
                        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values,10000),
                        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values,10000),
                        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)

                     }))
                     .reset_index()
                  )

# Total across all access modes
boardings_all = (boardings.groupby('station')
    .apply(lambda x: pd.Series({
        'Brd_PA': sum(x['unlinked_weight_adj']),
        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values, 10000),
        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values, 10000),
        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values, 10000),
        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)
    }))
    .reset_index()
)
boardings_all['AccessMode'] = 'All'

# Combine grouped + all
boardings_total = pd.concat([boardings_total, boardings_all], ignore_index=True)

boardings_total['Source'] = 'OBS'
boardings_total['Notes'] = 'Boardings represent all boardings, not just direct to CRT.'
boardings_total = boardings_total[['Source','station','AccessMode','Brd_PA','Dist_15','Dist_Med','Dist_85','Dist_Avg', 'Notes']]
boardings_total

  boardings_total = (boardings.groupby(['station','AccessMode'])
  boardings_all = (boardings.groupby('station')


Unnamed: 0,Source,station,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,OBS,01-OGDEN STATION,drive,1359.419,2.099442,4.35,7.42,5.370319,"Boardings represent all boardings, not just di..."
1,OBS,01-OGDEN STATION,walk,495.237,0.2444,0.428609,1.160804,0.656307,"Boardings represent all boardings, not just di..."
2,OBS,02-ROY STATION,drive,669.648,1.232191,3.040313,4.24,3.393104,"Boardings represent all boardings, not just di..."
3,OBS,02-ROY STATION,walk,168.575,0.363893,0.641269,1.046372,0.789633,"Boardings represent all boardings, not just di..."
4,OBS,03-CLEARFIELD STATION,drive,739.268,1.19,3.04,5.2,3.241714,"Boardings represent all boardings, not just di..."
5,OBS,03-CLEARFIELD STATION,walk,232.219,0.270308,0.527209,1.000823,0.709644,"Boardings represent all boardings, not just di..."
6,OBS,04-LAYTON STATION,drive,1113.116,1.40245,2.35,3.9,2.940304,"Boardings represent all boardings, not just di..."
7,OBS,04-LAYTON STATION,walk,384.134,0.313129,1.026735,1.948161,1.111841,"Boardings represent all boardings, not just di..."
8,OBS,05-FARMINGTON STATION,drive,378.69,1.43,2.64,6.25,4.07251,"Boardings represent all boardings, not just di..."
9,OBS,05-FARMINGTON STATION,walk,120.055,0.150007,0.882145,2.067059,1.045497,"Boardings represent all boardings, not just di..."


## Comparison of TDM and OBS

In [8]:
sumStats = pd.concat([boardings_total,df_tdm_brd],ignore_index=True)

In [9]:
def plotit(variable, access_mode):
    output.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:
    
        filtered_data = sumStats[sumStats['AccessMode'] == access_mode]
            
        # Create histogram
        fig = px.histogram(
            filtered_data, 
            x="station", 
            y=variable, 
            text_auto='.2s',
            color='Source', 
            barmode='group',
            height=400
        )
        fig.update_layout(
            xaxis_title="Station Name",
            yaxis_title=str(variable),
            legend_title="Model Version"
        )
        
        # Display the plot
        fig.show()
    
    else:
        firstTime = True

In [10]:
lstValues = list([
    'Dist_15',
    'Dist_Med',
    'Dist_85',
    'Dist_Avg'
])
accessModeOptions = ['drive', 'walk']

selectValues = widgets.Select(options=lstValues, value=('Dist_Med'), description = 'Select Variable')
selectAccessMode = widgets.Dropdown(options=accessModeOptions, value='drive', description='Access Mode')

# Set up a global variable to track whether the widgets have been changed
firstTime = False

# create output widget to display filtered DataFrame
output = widgets.Output()
hbox = widgets.HBox([selectValues, selectAccessMode])

# create interactive widget
interactive_output = widgets.interactive_output(plotit, {'variable':selectValues, 'access_mode': selectAccessMode})

display(hbox)
display(interactive_output)
display(output)

HBox(children=(Select(description='Select Variable', index=1, options=('Dist_15', 'Dist_Med', 'Dist_85', 'Dist…

Output()

Output()

In [11]:
sumStats.rename(columns={'Source': 'model_id'}).to_csv("crt-improvements-app/data/access-distance-stats.csv", index=False)
print("access-distance-stats.csv")

access-distance-stats.csv


In [12]:
import pandas as pd
import numpy as np

# Read stations data
df_stations1 = gs.df_stations[['station', 'N', 'County']]


# Output container
df_tdm_brd_county = pd.DataFrame()

for tdm_filename, tdm_sourcename in zip(tdm_filenames, tdm_sourcenames):
    print(tdm_sourcename)
    df_tdm_links = pd.read_csv(tdm_filename, low_memory=False)

    # Merge boarding and alighting station info (with County)
    df_tdm_1 = (
        df_tdm_links
        .merge(df_stations1[['N', 'station', 'County']], left_on="B", right_on="N", how="left")
        .rename(columns={"station": "brd_station", "County": "brd_county"})
        .merge(df_stations1[['N', 'station', 'County']], left_on="A", right_on="N", how="left")
        .rename(columns={"station": "alt_station", "County": "alt_county"})
        .drop(columns=['N_x', 'N_y'])
        .copy()
    )

    # Filter to relevant CRT access trips
    df_tdm_2 = df_tdm_1[
        ((df_tdm_1["brd_station"].notna()) & (df_tdm_1["A"] < 10000)) |
        ((df_tdm_1["alt_station"].notna()) & (df_tdm_1["B"] < 10000))
    ].copy()

    df_tdm_2 = df_tdm_2[df_tdm_2["Mode"].isin([80, 11])].copy()
    df_tdm_2["FromSkim_CRT"] = pd.to_numeric(df_tdm_2["FromSkim_CRT"], errors="coerce")

    # Prepare relevant columns
    df_tdm_3 = df_tdm_2[
        ['Mode', 'Period', 'AccessMode', 'brd_station', 'brd_county', 'alt_station', 'Riders', 'Distance']
    ].copy()

    # Group by County + Mode + AccessMode
    df_county_mode = (
        df_tdm_3
        .dropna(subset=["brd_station", "brd_county"])
        .groupby(['brd_county', 'Mode', 'AccessMode'], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
        .reset_index()
    )

    # Group by County + Mode (All access modes combined)
    df_county_all = (
        df_tdm_3
        .dropna(subset=["brd_station", "brd_county"])
        .groupby(['brd_county', 'Mode'], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
        .reset_index()
    )
    df_county_all["AccessMode"] = "All"

    # Combine both AccessMode-specific and "All"
    df_summary = pd.concat([df_county_mode, df_county_all], ignore_index=True)

    # Add source/notes metadata
    df_summary["Source"] = tdm_sourcename
    df_summary["Notes"] = "County-level summary of CRT boardings."

    # Reorder columns
    df_summary = df_summary[
        ["Source", "brd_county", "Mode", "AccessMode", "Brd_PA", "Dist_15", "Dist_Med", "Dist_85", "Dist_Avg", "Notes"]
    ].rename(columns={"brd_county": "County"})

    # Append to result container
    df_tdm_brd_county = pd.concat([df_tdm_brd_county, df_summary], ignore_index=True)

# Final output
display(df_tdm_brd_county)


v910-official__BY_2019


  df_tdm_3
  df_tdm_3


v910-official__RTP_2050


  df_tdm_3
  df_tdm_3


E2.14.4__BY_2019


  df_tdm_3
  df_tdm_3


E2.14.4__RTP_2050


  df_tdm_3
  df_tdm_3


E2.16.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.16.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.17.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_2050


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_MedDistAdj_2050


  df_tdm_3
  df_tdm_3


E2.18.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.4__BY_2019


  df_tdm_3
  df_tdm_3


Unnamed: 0,Source,County,Mode,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,v910-official__BY_2019,Davis,11.0,walk,1034.93,0.17,0.29,0.64,0.412758,County-level summary of CRT boardings.
1,v910-official__BY_2019,Davis,80.0,drive,2959.38,1.44,2.70,4.34,2.929742,County-level summary of CRT boardings.
2,v910-official__BY_2019,Salt Lake,11.0,walk,1669.52,0.19,0.35,0.67,0.414255,County-level summary of CRT boardings.
3,v910-official__BY_2019,Salt Lake,80.0,drive,3478.83,2.42,4.34,7.52,4.856203,County-level summary of CRT boardings.
4,v910-official__BY_2019,Utah,11.0,walk,1188.87,0.08,0.43,0.58,0.378081,County-level summary of CRT boardings.
...,...,...,...,...,...,...,...,...,...,...
187,E2.18.4__BY_2019,Salt Lake,80.0,All,3026.15,2.42,4.02,5.97,4.152664,County-level summary of CRT boardings.
188,E2.18.4__BY_2019,Utah,11.0,All,1070.40,0.08,0.43,0.58,0.368766,County-level summary of CRT boardings.
189,E2.18.4__BY_2019,Utah,80.0,All,5291.83,1.90,3.47,6.29,3.919640,County-level summary of CRT boardings.
190,E2.18.4__BY_2019,Weber,11.0,All,414.59,0.37,0.45,0.68,0.512463,County-level summary of CRT boardings.


In [13]:
_df = df_tdm_brd_county

_df = _df[(_df['AccessMode'] == 'drive') & (_df['Mode'] == 80)]

_df[['Source','County','AccessMode','Dist_Med']].pivot_table(index='County', columns='Source', values='Dist_Med')

Source,E2.14.4__BY_2019,E2.14.4__RTP_2050,E2.16.2__BY_2019,E2.16.3__BY_2019,E2.17.3__BY_2019,E2.18.2__BY_2019,E2.18.2__RTP_2050,E2.18.2__RTP_MedDistAdj_2050,E2.18.3__BY_2019,E2.18.4__BY_2019,v910-official__BY_2019,v910-official__RTP_2050
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Davis,2.61,2.59,2.59,2.59,2.56,2.55,2.59,2.59,2.55,2.56,2.7,2.59
Salt Lake,4.3,3.93,4.55,3.99,4.02,4.02,4.1,4.1,4.02,4.02,4.34,3.85
Utah,4.46,4.7,3.52,3.52,3.52,3.45,3.21,3.97,3.46,3.47,4.69,4.74
Weber,3.77,4.51,3.7,3.62,3.62,3.62,4.66,4.66,3.62,3.62,3.86,4.51


In [14]:
# Add County first
boardings = boardings.merge(
    df_stations1[['station', 'County']],
    on='station',
    how='left'
)

# By County and access mode
boardings_total = (
    boardings.groupby(['County', 'AccessMode'])
    .apply(lambda x: pd.Series({
        'Brd_PA': sum(x['unlinked_weight_adj']),
        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values, 10000),
        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values, 10000),
        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values, 10000),
        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)
    }))
    .reset_index()
)

# By County (all access modes)
boardings_all = (
    boardings.groupby('County')
    .apply(lambda x: pd.Series({
        'Brd_PA': sum(x['unlinked_weight_adj']),
        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values, 10000),
        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values, 10000),
        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values, 10000),
        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)
    }))
    .reset_index()
)
boardings_all['AccessMode'] = 'All'

# Combine grouped + all
boardings_total = pd.concat([boardings_total, boardings_all], ignore_index=True)

# Add final metadata
boardings_total['Source'] = 'OBS'
boardings_total['Notes'] = 'Boardings represent all boardings, not just direct to CRT.'
boardings_total = boardings_total[
    ['Source', 'County', 'AccessMode', 'Brd_PA', 'Dist_15', 'Dist_Med', 'Dist_85', 'Dist_Avg', 'Notes']
]


  boardings.groupby(['County', 'AccessMode'])
  boardings.groupby('County')


In [15]:
_df = boardings_total

_df = _df[(_df['AccessMode'] == 'drive')]

_df[['Source','County','AccessMode','Dist_Med']]

Unnamed: 0,Source,County,AccessMode,Dist_Med
0,OBS,Davis,drive,2.64
2,OBS,Salt Lake,drive,4.18
4,OBS,Utah,drive,3.25
6,OBS,Weber,drive,3.69


In [16]:
# Add County first
boardings = boardings.merge(
    df_stations1[['station', 'County']],
    on='station',
    how='left'
)

# By County and access mode
boardings_region_total = (
    boardings.groupby(['AccessMode'])
    .apply(lambda x: pd.Series({
        'Brd_PA': sum(x['unlinked_weight_adj']),
        'Dist_15': weighted_quantile(x['AcDist'].values, 0.15, x['unlinked_weight_adj'].values, 10000),
        'Dist_Med': weighted_quantile(x['AcDist'].values, 0.5, x['unlinked_weight_adj'].values, 10000),
        'Dist_85': weighted_quantile(x['AcDist'].values, 0.85, x['unlinked_weight_adj'].values, 10000),
        'Dist_Avg': weighted_average(x['AcDist'].values, x['unlinked_weight_adj'].values)
    }))
    .reset_index()
)


# Combine grouped + all
boardings_region_total = pd.concat([boardings_region_total], ignore_index=True)

# Add final metadata
boardings_region_total['Source'] = 'OBS'
boardings_region_total['Notes'] = 'Boardings represent all boardings, not just direct to CRT.'
boardings_region_total = boardings_region_total[
    ['Source', 'AccessMode', 'Brd_PA', 'Dist_15', 'Dist_Med', 'Dist_85', 'Dist_Avg', 'Notes']
]


  boardings.groupby(['AccessMode'])


In [17]:
_df = boardings_region_total

_df = _df[(_df['AccessMode'] == 'drive')]

_df[['Source','AccessMode','Dist_Med']]

Unnamed: 0,Source,AccessMode,Dist_Med
0,OBS,drive,3.341861


In [18]:
import pandas as pd
import numpy as np

# Read stations data
df_stations1 = gs.df_stations[['station', 'N', 'County']]


# Output container
df_tdm_brd_region = pd.DataFrame()

for tdm_filename, tdm_sourcename in zip(tdm_filenames, tdm_sourcenames):
    print(tdm_sourcename)
    df_tdm_links = pd.read_csv(tdm_filename, low_memory=False)

    # Merge boarding and alighting station info (with County)
    df_tdm_1 = (
        df_tdm_links
        .merge(df_stations1[['N', 'station', 'County']], left_on="B", right_on="N", how="left")
        .rename(columns={"station": "brd_station", "County": "brd_county"})
        .merge(df_stations1[['N', 'station', 'County']], left_on="A", right_on="N", how="left")
        .rename(columns={"station": "alt_station", "County": "alt_county"})
        .drop(columns=['N_x', 'N_y'])
        .copy()
    )

    # Filter to relevant CRT access trips
    df_tdm_2 = df_tdm_1[
        ((df_tdm_1["brd_station"].notna()) & (df_tdm_1["A"] < 10000)) |
        ((df_tdm_1["alt_station"].notna()) & (df_tdm_1["B"] < 10000))
    ].copy()

    df_tdm_2 = df_tdm_2[df_tdm_2["Mode"].isin([80, 11])].copy()
    df_tdm_2["FromSkim_CRT"] = pd.to_numeric(df_tdm_2["FromSkim_CRT"], errors="coerce")

    # Prepare relevant columns
    df_tdm_3 = df_tdm_2[
        ['Mode', 'Period', 'AccessMode', 'brd_station', 'brd_county', 'alt_station', 'Riders', 'Distance']
    ].copy()

    # Group by County + Mode + AccessMode
    df_county_mode = (
        df_tdm_3
        .dropna(subset=["brd_station"])
        .groupby(['Mode', 'AccessMode'], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
        .reset_index()
    )

    # Group by County + Mode (All access modes combined)
    df_county_all = (
        df_tdm_3
        .dropna(subset=["brd_station"])
        .groupby(['Mode'], as_index=False)
        .apply(lambda x: pd.Series({
            'Brd_PA': x['Riders'].sum(),
            'Dist_15': weighted_quantile(x['Distance'].values, 0.15, x['Riders'].values, 10000),
            'Dist_Med': weighted_quantile(x['Distance'].values, 0.5, x['Riders'].values, 10000),
            'Dist_85': weighted_quantile(x['Distance'].values, 0.85, x['Riders'].values, 10000),
            'Dist_Avg': weighted_average(x['Distance'].values, x['Riders'].values)
        }))
        .reset_index()
    )
    df_county_all["AccessMode"] = "All"

    # Combine both AccessMode-specific and "All"
    df_summary = pd.concat([df_county_mode, df_county_all], ignore_index=True)

    # Add source/notes metadata
    df_summary["Source"] = tdm_sourcename
    df_summary["Notes"] = "County-level summary of CRT boardings."

    # Reorder columns
    df_summary = df_summary[
        ["Source", "Mode", "AccessMode", "Brd_PA", "Dist_15", "Dist_Med", "Dist_85", "Dist_Avg", "Notes"]
    ]

    # Append to result container
    df_tdm_brd_region = pd.concat([df_tdm_brd_region, df_summary], ignore_index=True)

# Final output
display(df_tdm_brd_region)


v910-official__BY_2019


  df_tdm_3
  df_tdm_3


v910-official__RTP_2050


  df_tdm_3
  df_tdm_3


E2.14.4__BY_2019


  df_tdm_3
  df_tdm_3


E2.14.4__RTP_2050


  df_tdm_3
  df_tdm_3


E2.16.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.16.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.17.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_2050


  df_tdm_3
  df_tdm_3


E2.18.2__RTP_MedDistAdj_2050


  df_tdm_3
  df_tdm_3


E2.18.3__BY_2019


  df_tdm_3
  df_tdm_3


E2.18.4__BY_2019


  df_tdm_3
  df_tdm_3


Unnamed: 0,Source,Mode,AccessMode,Brd_PA,Dist_15,Dist_Med,Dist_85,Dist_Avg,Notes
0,v910-official__BY_2019,11.0,walk,4334.59,0.17,0.4,0.66,0.414053,County-level summary of CRT boardings.
1,v910-official__BY_2019,80.0,drive,12194.51,1.9,3.77,8.14,5.057928,County-level summary of CRT boardings.
2,v910-official__BY_2019,11.0,All,4334.59,0.17,0.4,0.66,0.414053,County-level summary of CRT boardings.
3,v910-official__BY_2019,80.0,All,12194.51,1.9,3.77,8.14,5.057928,County-level summary of CRT boardings.
4,v910-official__RTP_2050,11.0,walk,18472.23,0.17,0.29,0.64,0.364819,County-level summary of CRT boardings.
5,v910-official__RTP_2050,80.0,drive,43880.87,1.79,3.69,8.01,4.823302,County-level summary of CRT boardings.
6,v910-official__RTP_2050,11.0,All,18472.23,0.17,0.29,0.64,0.364819,County-level summary of CRT boardings.
7,v910-official__RTP_2050,80.0,All,43880.87,1.79,3.69,8.01,4.823302,County-level summary of CRT boardings.
8,E2.14.4__BY_2019,11.0,walk,4359.05,0.17,0.4,0.66,0.414664,County-level summary of CRT boardings.
9,E2.14.4__BY_2019,80.0,drive,13412.59,1.85,3.53,7.35,4.554548,County-level summary of CRT boardings.


In [19]:
_df = df_tdm_brd_region

_df = _df[(_df['AccessMode'] == 'drive') & (_df['Mode'] == 80)]

_df[['Source','AccessMode','Dist_Med']]

Unnamed: 0,Source,AccessMode,Dist_Med
1,v910-official__BY_2019,drive,3.77
5,v910-official__RTP_2050,drive,3.69
9,E2.14.4__BY_2019,drive,3.53
13,E2.14.4__RTP_2050,drive,3.8
17,E2.16.2__BY_2019,drive,3.48
21,E2.16.3__BY_2019,drive,3.32
25,E2.17.3__BY_2019,drive,3.33
29,E2.18.2__BY_2019,drive,3.32
33,E2.18.2__RTP_2050,drive,3.44
37,E2.18.2__RTP_MedDistAdj_2050,drive,3.63
