In [1]:
import pandas as pd
import openmatrix as omx
import numpy as np
import os

# Station Info

In [2]:
station_obs_names_df = pd.read_csv(r"E:\GitHub\Resources\1-TDM\Station-Lookups\station-obs-names.csv")
crt_station_obs_names_df = station_obs_names_df[station_obs_names_df['mode']=='CRT']
crt_station_obs_names_df = crt_station_obs_names_df.groupby(['LAT','LON'],as_index=False).agg(STATION=('STATION','first'),TAZID=('TAZID','max'))

# Sort by LAT (descending) and LON (ascending) for north-to-south ordering
unique_stations_df = crt_station_obs_names_df.drop_duplicates(subset=['STATION']).sort_values(
    by=['LAT', 'LON'], 
    ascending=[False, False]
).reset_index(drop=True)

# Assign row numbers only to unique STATIONs
unique_stations_df['RowNumber'] = range(1, len(unique_stations_df) + 1)

# Merge back to original DataFrame to retain all rows
crt_station_obs_names_df = crt_station_obs_names_df.merge(
    unique_stations_df[['STATION', 'RowNumber']], 
    on='STATION', 
    how='left'
)

crt_station_obs_names_df['STATION_ID'] = crt_station_obs_names_df['RowNumber'].apply(lambda x: f"{x:02d}") + '-' + crt_station_obs_names_df['STATION']

# USE WALK INSTEAD______
## CLOSE STATION FIXES
## CHANGE ALL NORTH TEMPLE STATION TAZs to 933 since TAZ station is in actually would drive to SL Central
## CHANGE ALL SALT LAKE CENTRAL STATION TAZs to 1023 since TAZ station is acutally in this one
#crt_station_obs_names_df.loc[crt_station_obs_names_df['STATION']=='North Temple', 'TAZID'] = 933
#crt_station_obs_names_df.loc[crt_station_obs_names_df['STATION']=='Salt Lake Central', 'TAZID'] = 1023

crt_station_obs_names_df

Unnamed: 0,LAT,LON,STATION,TAZID,RowNumber,STATION_ID
0,40.22544,-111.660632,Provo Central,3001.0,15,15-Provo Central
1,40.28014,-111.725489,Orem Central,2882.0,14,14-Orem Central
2,40.374774,-111.820649,American Fork,2728.0,13,13-American Fork
3,40.425196,-111.896354,Lehi,2605.0,12,12-Lehi
4,40.515484,-111.904407,Draper,2117.0,11,11-Draper
5,40.563155,-111.900753,South Jordan,1973.0,10,10-South Jordan
6,40.659631,-111.895661,Murray Central,1633.0,9,09-Murray Central
7,40.659758,-111.896432,Murray Central,1633.0,9,09-Murray Central
8,40.660269,-111.895282,Murray Central,1633.0,9,09-Murray Central
9,40.761721,-111.908301,Salt Lake Central,1024.0,8,08-Salt Lake Central


In [3]:
# Mapping of old station names to new ones
station_name_mapping = {
    "15-Provo Central"     : "15-Prov",
    "14-Orem Central"      : "14-Orem",
    "13-American Fork"     : "13-AF",
    "12-Lehi"              : "12-Lehi",
    "11-Draper"            : "11-Drap",
    "10-South Jordan"      : "10-SJ",
    "09-Murray Central"    : "09-Murr",
    "08-Salt Lake Central" : "08-SLCn",
    "07-North Temple"      : "07-NTmp",
    "06-Woods Cross"       : "06-WC",
    "05-Farmington"        : "05-Frmg",
    "04-Layton"            : "04-Layt",
    "03-Clearfield"        : "03-Clrf",
    "02-Roy"               : "02-Roy",
    "01-Ogden"             : "01-Ogdn"
}

# Rename station IDs in the dataframe
crt_station_obs_names_df['STATION_ID'] = crt_station_obs_names_df['STATION_ID'].replace(station_name_mapping)


In [4]:
# Perform Cartesian Join (self-join)
crossed_df = crt_station_obs_names_df.merge(
    crt_station_obs_names_df, 
    how='cross', 
    suffixes=('_1', '_2')
)

# Filter where STATION_ID_1 < STATION_ID_2 alphabetically
crt_station_obs_names_crossed_df = crossed_df[crossed_df['STATION_ID_1'] < crossed_df['STATION_ID_2']]
crt_station_obs_names_crossed_df = crt_station_obs_names_crossed_df[['STATION_ID_1','STATION_ID_2','TAZID_1','TAZID_2']]

crt_station_obs_names_crossed_df

Unnamed: 0,STATION_ID_1,STATION_ID_2,TAZID_1,TAZID_2
19,14-Orem,15-Prov,2882.0,3001.0
38,13-AF,15-Prov,2728.0,3001.0
39,13-AF,14-Orem,2728.0,2882.0
57,12-Lehi,15-Prov,2605.0,3001.0
58,12-Lehi,14-Orem,2605.0,2882.0
...,...,...,...,...
355,01-Ogdn,06-WC,368.0,863.0
356,01-Ogdn,05-Frmg,368.0,780.0
357,01-Ogdn,04-Layt,368.0,737.0
358,01-Ogdn,03-Clrf,368.0,663.0


# TDM Prep

In [5]:
model_paths = {
#   "E2.14.1-TDM-Recalib"                            : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.14.1/BY_2019",
#   "E2.14.2-TDM-Recalib-100-divisor-removed"        : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.14.2/BY_2019",
#   "E2.14.3-TDM-Recalib-second-part-ivt-removed"    : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.14.3/BY_2019",
    "1-TDM-Recalib"                                  : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.14.4/BY_2019",
    "2-IVT-Test"                                     : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.7.3/BY_2019",
    "3-IVT-Test-2"                                   : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.7.4/BY_2019",
#   "E2.13.2-Single-TLF-RunFac-2.5"                  : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.2/BY_2019",
#   "E2.13.3-Purpose-TLF-RunFac-2.5"                 : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.3/BY_2019",
#   "E2.13.4-PurpPrd-TLF-RunFac-2.5"                 : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.4/BY_2019",
#   "E2.13.4.2-PurpPrd-TLF-RunFac-2.5-NonZero-Calib" : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.4.2/BY_2019",
    "4-ASC-Bin-By-Distance"                          : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.4.3/BY_2019",
#   "E2.13.5-PurpPrd-TLF-RunFac-2.5-wIvtTest"        : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.5/BY_2019",
    "5-ASC-Bin-By-Distance-wIvtTest"                 : "E:/GitHub/WF-TDM-v9x/Scenarios/v920-E2.13.5.2/BY_2019"
}

In [6]:
# Define transit skim files
transit_skim_loc = [
    r"1a_Skims\skm_d8_Pk.omx", r"1a_Skims\skm_d8_Ok.omx",
    r"1a_Skims\skm_w8_Pk.omx", r"1a_Skims\skm_w8_Ok.omx"
]

# Define invalid TAZs
invalid_model_taz_list = np.arange(3547, 3601)

# Define purpose, periods, and access modes
purposes = ['HBW', 'HBO', 'NHB', 'HBC']
periods = ['PK', 'OK']
accesses = ['dCRT', 'wCRT']

skimloc_mapping = {
    ('PK', 'dCRT'): 0,
    ('PK', 'wCRT'): 2,
    ('OK', 'dCRT'): 1,
    ('OK', 'wCRT'): 3
}
access_mapping = {'dCRT': 'Drive', 'wCRT': 'Walk'}

# Function to convert model data into DataFrame
def create_model_mtx_to_df(trips_file_name, crdist_file_name, trips_mtx_name, crdist_mtx_name='D8', delZero=True):
    trips_file = omx.open_file(trips_file_name)
    trips_mtx = np.array(trips_file[trips_mtx_name])
    crdist_file = omx.open_file(crdist_file_name)
    crdist_mtx = np.array(crdist_file[crdist_mtx_name])

    trips_df = pd.DataFrame(pd.DataFrame(trips_mtx).stack()).rename({0: 'trips_count'}, axis=1)
    crdist_df = pd.DataFrame(pd.DataFrame(crdist_mtx).stack()).rename({0: 'cr_travel_distance'}, axis=1)

    model_df = pd.concat([trips_df, crdist_df], axis=1).reset_index().rename(
        {'level_0': 'p_TAZID', 'level_1': 'a_TAZID'}, axis=1)

    model_df['p_TAZID'] += 1
    model_df['a_TAZID'] += 1
    model_df = model_df[~model_df['p_TAZID'].isin(invalid_model_taz_list) & ~model_df['a_TAZID'].isin(invalid_model_taz_list)]

    # **Filter out rows where trips_count == 0**
    if delZero: 
        model_df = model_df[model_df['trips_count'] > 0]

    return model_df

# Dictionary to store results from all models
model_results = {}

# Iterate through model versions
for version, Model_path in model_paths.items():
    print("")
    print(f"Processing Model Version: {version}")
    model_data_loc = os.path.join(Model_path, r"4_ModeChoice\1a_Skims")
    model_data_loc2 = os.path.join(Model_path, r"4_ModeChoice")

    for purpose in purposes:
        print(f"...{purpose}", end="")
        for period in periods:
            print(f" {period}", end="")
            if purpose=='HBC' and period=='OK':
                print(f" skip", end="")
                continue
            for access in accesses:
                print(f" {access}", end="")

                access_mapped = access_mapping.get(access, 'Unknown')  # Returns 'Drive'

                model_df = create_model_mtx_to_df(
                    trips_file_name=os.path.join(model_data_loc2, '2_DetailedTripMatrices', f"{purpose}_trips_allsegs_{period}.omx"),
                    crdist_file_name=os.path.join(model_data_loc2, transit_skim_loc[skimloc_mapping.get((period, access), None)]),
                    trips_mtx_name=access,
                    delZero = True
                )

                model_df['trips_count'] /= 100
                model_df = model_df[model_df['cr_travel_distance'] > 0]

                key = f"{purpose}_{access}_{period}_{version}"
                model_results[key] = model_df.copy()

print("")
print("Done!")


Processing Model Version: 1-TDM-Recalib
...HBW PK dCRT wCRT OK dCRT wCRT...HBO PK dCRT wCRT OK dCRT wCRT...NHB PK dCRT wCRT OK dCRT wCRT...HBC PK dCRT wCRT OK skip
Processing Model Version: 2-IVT-Test
...HBW PK dCRT wCRT OK dCRT wCRT...HBO PK dCRT wCRT OK dCRT wCRT...NHB PK dCRT wCRT OK dCRT wCRT...HBC PK dCRT wCRT OK skip
Processing Model Version: 3-IVT-Test-2
...HBW PK dCRT wCRT OK dCRT wCRT...HBO PK dCRT wCRT OK dCRT wCRT...NHB PK dCRT wCRT OK dCRT wCRT...HBC PK dCRT wCRT OK skip
Processing Model Version: 4-ASC-Bin-By-Distance
...HBW PK dCRT wCRT OK dCRT wCRT...HBO PK dCRT wCRT OK dCRT wCRT...NHB PK dCRT wCRT OK dCRT wCRT...HBC PK dCRT wCRT OK skip
Processing Model Version: 5-ASC-Bin-By-Distance-wIvtTest
...HBW PK dCRT wCRT OK dCRT wCRT...HBO PK dCRT wCRT OK dCRT wCRT...NHB PK dCRT wCRT OK dCRT wCRT...HBC PK dCRT wCRT OK skip
Done!


In [7]:
model_results_df = pd.concat(model_results).reset_index().drop(columns=['level_1'])

# Split 'level_0' into four parts based on the first three underscores
split_cols = model_results_df['level_0'].str.split('_', n=3, expand=True)

model_results_df.drop(columns=['level_0'], inplace=True)

# Assign the first three parts to new columns
model_results_df['purpose'] = split_cols[0]
model_results_df['mode'] = split_cols[1]
model_results_df['period'] = split_cols[2]
model_results_df['model'] = split_cols[3]

model_results_df

Unnamed: 0,p_TAZID,a_TAZID,trips_count,cr_travel_distance,purpose,mode,period,model
0,3,634,0.0001,11.24,HBW,dCRT,PK,1-TDM-Recalib
1,3,659,0.0001,11.24,HBW,dCRT,PK,1-TDM-Recalib
2,3,663,0.0002,11.24,HBW,dCRT,PK,1-TDM-Recalib
3,3,720,0.0002,14.92,HBW,dCRT,PK,1-TDM-Recalib
4,3,780,0.0005,20.80,HBW,dCRT,PK,1-TDM-Recalib
...,...,...,...,...,...,...,...,...
23092660,3180,2848,0.0021,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest
23092661,3195,2848,0.0330,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest
23092662,3210,2848,0.1438,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest
23092663,3211,2848,0.1011,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest


In [8]:
# Get station-to-station crt distance
# using TAZ of stations as proxy for station-to-station distance that can then be joined with crt distance matrix to parse out station-to-station pairs

crdist_file = omx.open_file(r"E:\GitHub\WF-TDM-v9x\Scenarios\v920-E2.13.5.2\BY_2019\4_ModeChoice\1a_Skims\skm_w8_Pk.omx")

crdist_df = pd.DataFrame(pd.DataFrame(np.array(crdist_file['D8'])).stack()).rename({0: 'cr_travel_distance'}, axis=1).reset_index().rename(columns={'level_0':'I','level_1':'J'})

# index start of omx is 0, voyager is 1
crdist_df['I'] += 1
crdist_df['J'] += 1

crdist_df = crdist_df[crdist_df['cr_travel_distance']>0]
crdist_df

Unnamed: 0,I,J,cr_travel_distance
269042,75,497,4.32
269045,75,500,4.32
269047,75,502,4.32
269209,75,664,11.24
269282,75,737,14.92
...,...,...,...
12566475,3463,2878,5.24
12566476,3463,2879,5.24
12566478,3463,2881,5.24
12566479,3463,2882,5.24


In [9]:
# Get station-to-station crt distance
# using TAZ of stations as proxy for station-to-station distance that can then be joined with crt distance matrix to parse out station-to-station pairs

model_results[key]

# use walk, since drive to crt gives interesting results between SLC Central and North Temple
combined_for_station_pairs_df = pd.merge(crdist_df, crt_station_obs_names_crossed_df, left_on=['I','J'], right_on=['TAZID_1','TAZID_2'])
station_pair_crt_distances_df = combined_for_station_pairs_df[['cr_travel_distance','STATION_ID_1','STATION_ID_2']].drop_duplicates()
station_pair_crt_distances_df

# manually add -- manually created matrix from station to next station distances to find closest
_df_temp = pd.DataFrame([
    [0.78, '07-NTmp', '08-SLCn' ],
    # THESE ARE STRANGE, IT DUPLICATES DISTANCE BETWEEN SOME STATION PAIRS!!!
    [8.64, '01-Ogdn', '02-Roy'], # 4.32 x 2
    [14.32, '08-SLCn', '09-Murr'], # 7.16 x 2
    [6.48, '10-SJ', '11-Drap'] # 3.24 x 2
], columns=['cr_travel_distance', 'STATION_ID_1', 'STATION_ID_2'])
station_pair_crt_distances_df = pd.concat([station_pair_crt_distances_df, _df_temp])

#
## IMPORTANT NOTE: Clearfield to SL Central and Woods Cross to Draper have the same distance!
## SO JUST COMBINE THEM FOR EASE
#
# Group by cr_travel_distance and concatenate STATION_ID_1 and STATION_ID_2
station_pair_crt_distances_df = station_pair_crt_distances_df.groupby("cr_travel_distance").agg({
    "STATION_ID_1": lambda x: ", ".join(map(str, x)),
    "STATION_ID_2": lambda x: ", ".join(map(str, x))
}).reset_index()

station_pair_crt_distances_df.sort_values(
    by=["STATION_ID_1", "STATION_ID_2"], ascending=[True, True]
)


Unnamed: 0,cr_travel_distance,STATION_ID_1,STATION_ID_2
3,4.32,01-Ogdn,02-Roy
16,8.64,01-Ogdn,02-Roy
22,11.24,01-Ogdn,03-Clrf
30,14.92,01-Ogdn,04-Layt
40,20.80,01-Ogdn,05-Frmg
...,...,...,...
27,14.12,12-Lehi,14-Orem
39,19.36,12-Lehi,15-Prov
15,8.44,13-AF,14-Orem
25,13.68,13-AF,15-Prov


In [10]:
model_results_with_station_pairs_df = pd.merge(model_results_df, station_pair_crt_distances_df, on=['cr_travel_distance'], how='left')
model_results_with_station_pairs_df

Unnamed: 0,p_TAZID,a_TAZID,trips_count,cr_travel_distance,purpose,mode,period,model,STATION_ID_1,STATION_ID_2
0,3,634,0.0001,11.24,HBW,dCRT,PK,1-TDM-Recalib,01-Ogdn,03-Clrf
1,3,659,0.0001,11.24,HBW,dCRT,PK,1-TDM-Recalib,01-Ogdn,03-Clrf
2,3,663,0.0002,11.24,HBW,dCRT,PK,1-TDM-Recalib,01-Ogdn,03-Clrf
3,3,720,0.0002,14.92,HBW,dCRT,PK,1-TDM-Recalib,01-Ogdn,04-Layt
4,3,780,0.0005,20.80,HBW,dCRT,PK,1-TDM-Recalib,01-Ogdn,05-Frmg
...,...,...,...,...,...,...,...,...,...,...
23092660,3180,2848,0.0021,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov
23092661,3195,2848,0.0330,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov
23092662,3210,2848,0.1438,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov
23092663,3211,2848,0.1011,5.24,HBC,wCRT,PK,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov


** MAKE SURE ALL RECORDS HAVE STATIONS ASSIGNED **

In [11]:
model_results_with_station_pairs_df[
    model_results_with_station_pairs_df.isnull().any(axis=1)
]['cr_travel_distance'].dropna().unique()

array([], dtype=float64)

In [12]:
model_results_with_station_pairs_df.groupby(['model'], as_index=False).agg(trips_total=('trips_count','sum'))

Unnamed: 0,model,trips_total
0,1-TDM-Recalib,19851.6567
1,2-IVT-Test,19916.8142
2,3-IVT-Test-2,19865.664
3,4-ASC-Bin-By-Distance,19864.9922
4,5-ASC-Bin-By-Distance-wIvtTest,19949.0246


In [13]:
model_results_with_station_pairs_df.loc[
    model_results_with_station_pairs_df['STATION_ID_1'].isna(), 'trips_count'
].sum()

0.0

In [14]:
# make sure records are the same number

# Check the number of records
count_1 = len(model_results_with_station_pairs_df)
count_2 = len(model_results_df)

# Print the result
if count_1 == count_2:
    print(f"✅ Both DataFrames have the same number of records: {count_1}")
else:
    print(f"❌ Mismatch: model_results_with_station_pairs_df has {count_1} records, "
          f"while model_results_df has {count_2} records.")


✅ Both DataFrames have the same number of records: 23092665


In [15]:
model_results_with_station_pairs_df.groupby(['model','mode','purpose','period'], as_index=False).agg(trips_total=('trips_count','sum'))

Unnamed: 0,model,mode,purpose,period,trips_total
0,1-TDM-Recalib,dCRT,HBC,PK,3005.1102
1,1-TDM-Recalib,dCRT,HBO,OK,1081.0926
2,1-TDM-Recalib,dCRT,HBO,PK,1159.3239
3,1-TDM-Recalib,dCRT,HBW,OK,1649.6114
4,1-TDM-Recalib,dCRT,HBW,PK,6120.1314
...,...,...,...,...,...
65,5-ASC-Bin-By-Distance-wIvtTest,wCRT,HBO,PK,655.1863
66,5-ASC-Bin-By-Distance-wIvtTest,wCRT,HBW,OK,741.2909
67,5-ASC-Bin-By-Distance-wIvtTest,wCRT,HBW,PK,1943.4715
68,5-ASC-Bin-By-Distance-wIvtTest,wCRT,NHB,OK,541.3396


In [16]:
model_results_CRT_df = model_results_with_station_pairs_df.groupby(['model','STATION_ID_1','STATION_ID_2','purpose','period','mode'], as_index=False).agg(trips_total=('trips_count','sum'))
model_results_CRT_df

Unnamed: 0,model,STATION_ID_1,STATION_ID_2,purpose,period,mode,trips_total
0,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,dCRT,9.6189
1,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,wCRT,6.6417
2,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,dCRT,3.3084
3,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,wCRT,47.7483
4,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,PK,dCRT,3.0872
...,...,...,...,...,...,...,...
6877,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov,HBW,PK,wCRT,33.6010
6878,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov,NHB,OK,dCRT,0.2105
6879,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov,NHB,OK,wCRT,12.4442
6880,5-ASC-Bin-By-Distance-wIvtTest,14-Orem,15-Prov,NHB,PK,dCRT,0.1755


In [17]:
model_results_CRT_df.groupby(['model'], as_index=False).agg(trips_total=('trips_total','sum'))

Unnamed: 0,model,trips_total
0,1-TDM-Recalib,19851.6567
1,2-IVT-Test,19916.8142
2,3-IVT-Test-2,19865.664
3,4-ASC-Bin-By-Distance,19864.9922
4,5-ASC-Bin-By-Distance-wIvtTest,19949.0246


# OBS Prep

In [18]:
obs_df = pd.read_csv(r"C:\Users\bhereth\Documents\2019 Final Weighted UTA OD Data - 2022-04-05 - processed.csv", low_memory=False)
obs_crt_df = obs_df[(obs_df['Linked_Mode_txt']=='CRT')]

# all HBC are peak period
obs_crt_df.loc[obs_crt_df['Purp5_text'] == 'HBC', 'PK_OK'] = 'PK'

# remove HBSch
obs_crt_df = obs_crt_df[obs_crt_df['Purp5_text']!='HBSch']

obs_crt_df

Unnamed: 0,id,date_completed,final_route_surveyed_code,final_route_surveyed,direction,mode,purpose,resident_or_visitor_code,home_or_hotel_address,home_or_hotel_city,...,FirstOn_lat,FirstOn_lon,LastOff_lat,LastOff_lon,p_Stop_lat,p_Stop_lon,a_Stop_lat,a_Stop_lon,unlinked_weight_adj,linked_weight_adj
3,30.0,9-Sep-19,SLC_1_703_00,TRAX RED LINE TO DAYBREAK PARKWAY STATION,0.0,1.0,2.0,1.0,262 E Whitlock Ave,South Salt Lake,...,40.724091,-111.896892,40.280140,-111.725489,40.724091,-111.896892,40.280140,-111.725489,11.440,5.720
5,32.0,9-Sep-19,SLC_1_701_00,TRAX BLUE LINE TO DRAPER TOWN CENTER STATION,0.0,1.0,1.0,1.0,10404 Avondale Dr,Cedar Hills,...,40.374774,-111.820649,40.767452,-111.891101,40.374774,-111.820649,40.767452,-111.891101,18.136,9.068
17,61.0,9-Sep-19,SLC_1_701_01,TRAX BLUE LINE TO SALT LAKE CENTRAL STATION,1.0,1.0,4.0,2.0,10333 S Jordan Gateway,South Jordan,...,40.563155,-111.900753,40.769344,-111.901146,40.563155,-111.900753,40.769344,-111.901146,3.265,1.633
46,212.0,11-Sep-19,SLC_1_701_01,TRAX BLUE LINE TO SALT LAKE CENTRAL STATION,1.0,1.0,1.0,1.0,W Coyote Cir,Saratoga Springs,...,40.425196,-111.896354,40.763855,-111.891071,40.425196,-111.896354,40.763855,-111.891071,9.312,4.656
58,236.0,11-Sep-19,SLC_1_704_01,TRAX GREEN LINE TO AIRPORT STATION,1.0,1.0,3.0,1.0,622 23rd St,Ogden,...,41.224285,-111.980631,40.783280,-111.980172,41.224285,-111.980631,40.783280,-111.980172,7.603,3.801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13403,22722.0,11-Dec-19,SLC_1_750_00,FRONTRUNNER TO OGDEN STATION,0.0,2.0,1.0,1.0,641 1850 N St,Ogden,...,40.763855,-111.891071,41.224285,-111.980631,41.224285,-111.980631,40.763855,-111.891071,11.951,5.976
13409,22737.0,11-Dec-19,SLC_1_750_00,FRONTRUNNER TO OGDEN STATION,0.0,2.0,1.0,1.0,185 E 1050 N,Bountiful,...,40.768217,-111.891081,40.880457,-111.903151,40.880457,-111.903151,40.768217,-111.891081,17.630,8.815
13410,22738.0,11-Dec-19,SLC_1_750_00,FRONTRUNNER TO OGDEN STATION,0.0,2.0,1.0,1.0,2803 W 1475 N,Layton,...,40.768217,-111.891081,41.094769,-112.013807,41.094769,-112.013807,40.768217,-111.891081,11.951,5.976
13414,22744.0,11-Dec-19,SLC_1_750_00,FRONTRUNNER TO OGDEN STATION,0.0,2.0,1.0,1.0,W 4800 S,Roy,...,40.772532,-111.905124,41.188757,-112.039378,41.188757,-112.039378,40.772532,-111.905124,11.238,11.238


In [19]:
# check total should be around 20,000, drive to CRT around 15,000)
obs_crt_df['linked_weight_adj'].sum()

20236.371999999996

In [20]:
stop_fields_lat_lon_df = pd.DataFrame([
    ['stop_on_lat'            , 'stop_on_long'              ],
    ['stop_off_lat'           , 'stop_off_long'             ],
    ['prev_tran_1_on_bus_lat' , 'prev_tran_1_on_bus_long'   ],
    ['prev_tran_1_off_bus_lat', 'prev_tran_1_off_bus_long'  ],
    ['prev_tran_2_on_bus_lat' , 'prev_tran_2_on_bus_long'   ],
    ['prev_tran_2_off_bus_lat', 'prev_tran_2_off_bus_long'  ],
    ['prev_tran_3_on_bus_lat' , 'prev_tran_3_on_bus_long'   ],
    ['prev_tran_3_off_bus_lat', 'prev_tran_3_off_bus_long'  ],
    ['prev_tran_4_on_bus_lat' , 'prev_tran_4_on_bus_long'   ],
    ['prev_tran_4_off_bus_lat', 'prev_tran_4_off_bus_long'  ],
    ['next_tran_1_on_bus_lat' , 'next_tran_1_on_bus_long'   ],
    ['next_tran_1_off_bus_lat', 'next_tran_1_off_bus_long'  ],
    ['next_tran_2_on_bus_lat' , 'next_tran_2_on_bus_long'   ],
    ['next_tran_2_off_bus_lat', 'next_tran_2_off_bus_long'  ],
    ['next_tran_3_on_bus_lat' , 'next_tran_3_on_bus_long'   ],
    ['next_tran_3_off_bus_lat', 'next_tran_3_off_bus_long'  ],
    ['next_tran_4_on_bus_lat' , 'next_tran_4_on_bus_long'   ],
    ['next_tran_4_off_bus_lat', 'next_tran_4_off_bus_long'  ]
], columns=['stop_lat','stop_lon'])
stop_fields_lat_lon_df

Unnamed: 0,stop_lat,stop_lon
0,stop_on_lat,stop_on_long
1,stop_off_lat,stop_off_long
2,prev_tran_1_on_bus_lat,prev_tran_1_on_bus_long
3,prev_tran_1_off_bus_lat,prev_tran_1_off_bus_long
4,prev_tran_2_on_bus_lat,prev_tran_2_on_bus_long
5,prev_tran_2_off_bus_lat,prev_tran_2_off_bus_long
6,prev_tran_3_on_bus_lat,prev_tran_3_on_bus_long
7,prev_tran_3_off_bus_lat,prev_tran_3_off_bus_long
8,prev_tran_4_on_bus_lat,prev_tran_4_on_bus_long
9,prev_tran_4_off_bus_lat,prev_tran_4_off_bus_long


In [21]:
obs_crt_df.columns.to_list()

['id',
 'date_completed',
 'final_route_surveyed_code',
 'final_route_surveyed',
 'direction',
 'mode',
 'purpose',
 'resident_or_visitor_code',
 'home_or_hotel_address',
 'home_or_hotel_city',
 'home_or_hotel_state',
 'home_or_hotel_zip',
 'home_or_hotel_lat',
 'home_or_hotel_long',
 'final_origin_place_type_code',
 'origin_address',
 'origin_city',
 'origin_state',
 'origin_zip',
 'origin_lat',
 'origin_long',
 'prev_transfers_code',
 'trip_first_route_code',
 'trip_first_route',
 'trip_first_route_other',
 'trip_second_route_code',
 'trip_second_route',
 'trip_second_route_other',
 'trip_third_route_code',
 'trip_third_route',
 'trip_third_route_other',
 'trip_fourth_route_code',
 'trip_fourth_route',
 'trip_fourth_route_other',
 'final_origin_transport_code',
 'final_destin_place_type_code',
 'destin_address',
 'destin_city',
 'destin_state',
 'destin_zip',
 'destin_lat',
 'destin_long',
 'next_transfers_code',
 'trip_next_route_code',
 'trip_next_route',
 'trip_next_route_other',


In [22]:
filtered_obs_fields = ['id','Purp5_text','PK_OK','Ac_Mode_Model','Linked_Mode_txt','linked_weight_adj']
filtered_obs_fields

['id',
 'Purp5_text',
 'PK_OK',
 'Ac_Mode_Model',
 'Linked_Mode_txt',
 'linked_weight_adj']

In [23]:
obs_crt_stations_df = pd.DataFrame()

for _, row in stop_fields_lat_lon_df.iterrows():
    #print(_)

    # get station name
    _df = pd.merge(obs_crt_df, crt_station_obs_names_df, left_on=[row['stop_lat'], row['stop_lon']], right_on=['LAT','LON'])

    _df = _df[filtered_obs_fields + ['STATION_ID']]

    obs_crt_stations_df = pd.concat([obs_crt_stations_df, _df])

display(obs_crt_stations_df)

Unnamed: 0,id,Purp5_text,PK_OK,Ac_Mode_Model,Linked_Mode_txt,linked_weight_adj,STATION_ID
0,32.0,HBW,PK,Drive,CRT,9.068,08-SLCn
1,61.0,NHB,OK,Walk,CRT,1.633,09-Murr
2,338.0,HBC,PK,Drive,CRT,1.697,09-Murr
3,430.0,HBO,OK,Walk,CRT,1.573,07-NTmp
4,434.0,HBW,OK,Drive,CRT,2.360,07-NTmp
...,...,...,...,...,...,...,...
29,18561.0,HBW,OK,Walk,CRT,6.300,11-Drap
30,19897.0,HBW,PK,Drive,CRT,5.046,02-Roy
31,21157.0,NHB,PK,Walk,CRT,7.784,05-Frmg
0,11872.0,HBW,OK,Drive,CRT,2.589,07-NTmp


In [24]:
obs_crt_stations_unique_df = obs_crt_stations_df.drop_duplicates()

grouped_df = obs_crt_stations_unique_df.groupby(
    ['id', 'linked_weight_adj', 'Purp5_text', 'PK_OK', 'Ac_Mode_Model', 'Linked_Mode_txt'],
    as_index=False
).agg({'STATION_ID': lambda x: sorted(x)})

# Extract STATION_ID_1 and STATION_ID_2 from the list
grouped_df['STATION_ID_1'] = grouped_df['STATION_ID'].apply(lambda x: x[0] if len(x) > 0 else None)
grouped_df['STATION_ID_2'] = grouped_df['STATION_ID'].apply(lambda x: x[1] if len(x) > 1 else None)

# Drop the original list column to keep only the new columns
grouped_df = grouped_df.drop(columns=['STATION_ID'])

In [25]:
_df = grouped_df.copy()
_df['mode'] = _df['Ac_Mode_Model'].str[0:1].str.lower() + _df['Linked_Mode_txt']
_df.rename(columns={'Purp5_text':'purpose','PK_OK':'period'}, inplace=True)
_df['model'] = 'On-Board Survey'
obs_results_CRT_df = _df.groupby(['model','STATION_ID_1','STATION_ID_2','purpose','period','mode'],as_index=False).agg(trips_total=('linked_weight_adj', 'sum'))
obs_results_CRT_df

Unnamed: 0,model,STATION_ID_1,STATION_ID_2,purpose,period,mode,trips_total
0,On-Board Survey,01-Ogdn,02-Roy,HBC,PK,dCRT,11.502
1,On-Board Survey,01-Ogdn,02-Roy,HBO,PK,wCRT,21.355
2,On-Board Survey,01-Ogdn,02-Roy,HBW,OK,wCRT,21.668
3,On-Board Survey,01-Ogdn,02-Roy,HBW,PK,dCRT,4.021
4,On-Board Survey,01-Ogdn,02-Roy,HBW,PK,wCRT,6.670
...,...,...,...,...,...,...,...
726,On-Board Survey,14-Orem,15-Prov,HBO,PK,dCRT,50.665
727,On-Board Survey,14-Orem,15-Prov,HBW,OK,dCRT,11.426
728,On-Board Survey,14-Orem,15-Prov,HBW,OK,wCRT,16.298
729,On-Board Survey,14-Orem,15-Prov,HBW,PK,wCRT,33.529


# Combine and Compare

In [26]:
results_CRT_df = pd.concat([model_results_CRT_df,obs_results_CRT_df])
results_CRT_df

Unnamed: 0,model,STATION_ID_1,STATION_ID_2,purpose,period,mode,trips_total
0,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,dCRT,9.6189
1,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,wCRT,6.6417
2,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,dCRT,3.3084
3,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,wCRT,47.7483
4,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,PK,dCRT,3.0872
...,...,...,...,...,...,...,...
726,On-Board Survey,14-Orem,15-Prov,HBO,PK,dCRT,50.6650
727,On-Board Survey,14-Orem,15-Prov,HBW,OK,dCRT,11.4260
728,On-Board Survey,14-Orem,15-Prov,HBW,OK,wCRT,16.2980
729,On-Board Survey,14-Orem,15-Prov,HBW,PK,wCRT,33.5290


In [27]:
check_df = results_CRT_df.groupby(['model','mode'], as_index=False).agg(trips_total=('trips_total','sum')).pivot(index='model',columns='mode',values='trips_total')
check_df['ALL'] = check_df['dCRT'] + check_df['wCRT']
check_df

mode,dCRT,wCRT,ALL
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1-TDM-Recalib,13409.0332,6442.6235,19851.6567
2-IVT-Test,13432.8017,6484.0125,19916.8142
3-IVT-Test-2,13414.3502,6451.3138,19865.664
4-ASC-Bin-By-Distance,13426.8838,6438.1084,19864.9922
5-ASC-Bin-By-Distance-wIvtTest,13499.0891,6449.9355,19949.0246
On-Board Survey,14379.261,5615.716,19994.977


In [28]:
# remove fields that don't line up
remove_df = pd.DataFrame([
    ['03-Clrf, 06-WC','08-SLCn, 11-Drap'],
    ['03-Clrf, 08-SLCn','06-WC, 11-Drap'],
    ['03-Clrf','06-WC'],
    ['03-Clrf','08-SLCn'],
    ['08-SLCn','11-Drap'],
    ['06-WC'  ,'11-Drap']
], columns=(['STATION_ID_1','STATION_ID_2']))


# Remove matching rows using a merge with indicator
_df_filtered = results_CRT_df.merge(remove_df, on=['STATION_ID_1', 'STATION_ID_2'], how='left', indicator=True)

# Keep only rows that do not match (i.e., are not in remove_df)
_df_filtered = _df_filtered[_df_filtered['_merge'] == 'left_only'].drop(columns=['_merge'])

# Display result
display(_df_filtered)

Unnamed: 0,model,STATION_ID_1,STATION_ID_2,purpose,period,mode,trips_total
0,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,dCRT,9.6189
1,1-TDM-Recalib,01-Ogdn,02-Roy,HBC,PK,wCRT,6.6417
2,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,dCRT,3.3084
3,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,OK,wCRT,47.7483
4,1-TDM-Recalib,01-Ogdn,02-Roy,HBO,PK,dCRT,3.0872
...,...,...,...,...,...,...,...
7608,On-Board Survey,14-Orem,15-Prov,HBO,PK,dCRT,50.6650
7609,On-Board Survey,14-Orem,15-Prov,HBW,OK,dCRT,11.4260
7610,On-Board Survey,14-Orem,15-Prov,HBW,OK,wCRT,16.2980
7611,On-Board Survey,14-Orem,15-Prov,HBW,PK,wCRT,33.5290


In [29]:
# export to csv
_df_filtered.to_csv('station-pairs-shiny/station-pairs-data.csv', index=False)

In [33]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

# Widgets
purpose_options = list(_df_filtered['purpose'].unique())
period_options = list(_df_filtered['period'].unique())
mode_options = list(_df_filtered['mode'].unique())
model_options = list(_df_filtered['model'].unique())

purpose_select = widgets.SelectMultiple(options=purpose_options, value=purpose_options, description="Purpose", rows=4)
period_select = widgets.SelectMultiple(options=period_options, value=period_options, description="Period", rows=2)
mode_select = widgets.SelectMultiple(options=mode_options, value=mode_options, description="Mode", rows=2)
model_1_dropdown = widgets.Dropdown(options=model_options, value=model_options[0], description="Select Model:")
model_2_dropdown = widgets.Dropdown(options=['None'] + model_options, value="None", description="Compare to:")

# Checkbox to switch between absolute and percent difference
diff_type_checkbox = widgets.Checkbox(value=False, description="Show Percent Difference")

shares_checkbox = widgets.Checkbox(value=False, description="Show Share of Total")

# Output widget
output = widgets.Output()

# Function to filter _df_filtered and display pivot table
def update_table(purpose, period, mode, model_1, model_2, show_percent, show_share):

    output.clear_output()  # Clear previous output before displaying new content
    global firstTime
    if firstTime:

        filtered_data_1 = _df_filtered[
            (_df_filtered['purpose'].isin(purpose)) & 
            (_df_filtered['period'].isin(period)) & 
            (_df_filtered['mode'].isin(mode)) & 
            (_df_filtered['model']==model_1)
        ]

        filtered_data_1 = filtered_data_1.groupby(['STATION_ID_1','STATION_ID_2'],as_index=False).agg(trips_total=('trips_total','sum'))

        # Aggregate totals for all unique STATION_IDs appearing in STATION_ID_1 or STATION_ID_2
        total_stations_1 = pd.concat([
            filtered_data_1.groupby('STATION_ID_1', as_index=False).agg(trips_total=('trips_total', 'sum')).rename(columns={'STATION_ID_1': 'STATION_ID'}),
            filtered_data_1.groupby('STATION_ID_2', as_index=False).agg(trips_total=('trips_total', 'sum')).rename(columns={'STATION_ID_2': 'STATION_ID'})
        ])

        # Sum total trips for each unique STATION_ID (handling cases where it appears in both columns)
        total_stations_1 = total_stations_1.groupby('STATION_ID', as_index=False).agg(trips_total=('trips_total', 'sum'))

        # Add a total row summing up all trips
        total_sum = filtered_data_1['trips_total'].sum()
        total_row = pd.DataFrame({'STATION_ID': ['ALL'], 'trips_total': [total_sum]})

        # Append the total row to the dataframe
        total_stations_1 = pd.concat([total_stations_1, total_row], ignore_index=True)

        if show_share:
            filtered_data_to_divide_by_1 = _df_filtered[
                (_df_filtered['model']==model_1)
            ]
            filtered_data_to_divide_by_1 = filtered_data_to_divide_by_1.groupby(['STATION_ID_1','STATION_ID_2'], as_index=False).agg(trips_total_divide=('trips_total','sum'))
            filtered_data_1 = pd.merge(filtered_data_1, filtered_data_to_divide_by_1, on=['STATION_ID_1','STATION_ID_2'])
            filtered_data_1['trips_total'] = filtered_data_1['trips_total'] / filtered_data_1['trips_total_divide'] * 100
            filtered_data_1 = filtered_data_1[['STATION_ID_1','STATION_ID_2','trips_total']]

            # Aggregate totals for all unique STATION_IDs appearing in STATION_ID_1 or STATION_ID_2
            total_stations_two_divide_by_1 = pd.concat([
                filtered_data_to_divide_by_1.groupby('STATION_ID_1', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum')).rename(columns={'STATION_ID_1': 'STATION_ID'}),
                filtered_data_to_divide_by_1.groupby('STATION_ID_2', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum')).rename(columns={'STATION_ID_2': 'STATION_ID'})
            ])
            total_stations_two_divide_by_1 = total_stations_two_divide_by_1.groupby('STATION_ID', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum'))

            # Add a total row summing up all trips
            total_sum = filtered_data_to_divide_by_1['trips_total_divide'].sum()
            total_row = pd.DataFrame({'STATION_ID': ['ALL'], 'trips_total_divide': [total_sum]})

            # Append the total row to the dataframe
            total_stations_two_divide_by_1 = pd.concat([total_stations_two_divide_by_1, total_row], ignore_index=True)

            total_stations_1 = pd.merge(total_stations_1, total_stations_two_divide_by_1, on=['STATION_ID'])
            total_stations_1['trips_total'] = total_stations_1['trips_total'] / total_stations_1['trips_total_divide'] * 100
            total_stations_1 = total_stations_1[['STATION_ID','trips_total']]

        if model_2 != 'None': 

            filtered_data_2 = _df_filtered[
                (_df_filtered['purpose'].isin(purpose)) & 
                (_df_filtered['period'].isin(period)) & 
                (_df_filtered['mode'].isin(mode)) & 
                (_df_filtered['model']==model_2)
            ]

            filtered_data_2 = filtered_data_2.groupby(['STATION_ID_1','STATION_ID_2'],as_index=False).agg(trips_total=('trips_total','sum'))
            
            # Aggregate totals for all unique STATION_IDs appearing in STATION_ID_1 or STATION_ID_2
            total_stations_2 = pd.concat([
                filtered_data_2.groupby('STATION_ID_1', as_index=False).agg(trips_total=('trips_total', 'sum')).rename(columns={'STATION_ID_1': 'STATION_ID'}),
                filtered_data_2.groupby('STATION_ID_2', as_index=False).agg(trips_total=('trips_total', 'sum')).rename(columns={'STATION_ID_2': 'STATION_ID'})
            ])

            # Sum total trips for each unique STATION_ID (handling cases where it appears in both columns)
            total_stations_2 = total_stations_2.groupby('STATION_ID', as_index=False).agg(trips_total=('trips_total', 'sum'))

            # Add a total row summing up all trips
            total_sum = filtered_data_2['trips_total'].sum()
            total_row = pd.DataFrame({'STATION_ID': ['ALL'], 'trips_total': [total_sum]})

            # Append the total row to the dataframe
            total_stations_2 = pd.concat([total_stations_2, total_row], ignore_index=True)

            if show_share:
                filtered_data_to_divide_by_2 = _df_filtered[
                    (_df_filtered['model']==model_2)
                ]
                filtered_data_to_divide_by_2 = filtered_data_to_divide_by_2.groupby(['STATION_ID_1','STATION_ID_2'], as_index=False).agg(trips_total_divide=('trips_total','sum'))
                filtered_data_2 = pd.merge(filtered_data_2, filtered_data_to_divide_by_2, on=['STATION_ID_1','STATION_ID_2'])
                filtered_data_2['trips_total'] = filtered_data_2['trips_total'] / filtered_data_2['trips_total_divide'] * 100
                filtered_data_2 = filtered_data_2[['STATION_ID_1','STATION_ID_2','trips_total']]

                # Aggregate totals for all unique STATION_IDs appearing in STATION_ID_1 or STATION_ID_2
                total_stations_two_divide_by_2 = pd.concat([
                    filtered_data_to_divide_by_2.groupby('STATION_ID_1', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum')).rename(columns={'STATION_ID_1': 'STATION_ID'}),
                    filtered_data_to_divide_by_2.groupby('STATION_ID_2', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum')).rename(columns={'STATION_ID_2': 'STATION_ID'})
                ])
                total_stations_two_divide_by_2 = total_stations_two_divide_by_2.groupby('STATION_ID', as_index=False).agg(trips_total_divide=('trips_total_divide', 'sum'))

                # Add a total row summing up all trips
                total_sum = filtered_data_to_divide_by_2['trips_total_divide'].sum()
                total_row = pd.DataFrame({'STATION_ID': ['ALL'], 'trips_total_divide': [total_sum]})

                # Append the total row to the dataframe
                total_stations_two_divide_by_2 = pd.concat([total_stations_two_divide_by_2, total_row], ignore_index=True)

                total_stations_2 = pd.merge(total_stations_2, total_stations_two_divide_by_2, on=['STATION_ID'])
                total_stations_2['trips_total'] = total_stations_2['trips_total'] / total_stations_2['trips_total_divide'] * 100
                total_stations_2 = total_stations_2[['STATION_ID','trips_total']]

            # Merge data on common keys to align before subtraction
            merged_data = pd.merge(
                filtered_data_1, 
                filtered_data_2, 
                on=['STATION_ID_1', 'STATION_ID_2'], 
                suffixes=('_m1', '_m2'), 
                how='outer'
            ).fillna(0)  # Fill missing values with 0 for subtraction

            # Merge data on common keys to align before subtraction
            merged_data_side = pd.merge(
                total_stations_1, 
                total_stations_2, 
                on=['STATION_ID'], 
                suffixes=('_m1', '_m2'), 
                how='outer'
            ).fillna(0)  # Fill missing values with 0 for subtraction

            def color_ramp(val):
                """Apply a smooth red-green gradient, where colors fade to white near zero."""
                try:
                    # Remove '%' and ',' then convert to int
                    num_val = int(str(val).replace('%', '').replace(',', ''))
                except ValueError:
                    return ''  # Return empty style if conversion fails

                # Normalize value range (cap at ±1000 for consistent scaling)
                norm_val = max(min(num_val, 1000), -1000)

                # Apply intensity scaling based on absolute value, keeping zero close to white
                scale_factor = (abs(norm_val) / 1000) ** 0.7  # Exponential scaling for smooth transition

                if norm_val < 0:
                    blue_intensity = int((1 - scale_factor) * 255)  # Closer to white for small values
                    color = f'rgba({blue_intensity}, {blue_intensity}, 255, 0.9)'  # Blue fading to white
                elif norm_val >= 0:
                    red_intensity = int((1 - scale_factor) * 255)  # Closer to white for small values
                    color = f'rgba(255, {red_intensity}, {red_intensity}, 0.9)'  # Red fading to white
                else:
                    color = "white"  # Keep zero values pure white

                return f'background-color: {color};'
            
            if show_percent:

                if not show_share:

                    # Compute percentage difference safely, setting to 0 when denominator is zero
                    merged_data['trips_total_diff'] = merged_data.apply(
                        lambda row: ((row['trips_total_m1'] - row['trips_total_m2']) / row['trips_total_m2'] * 100)
                        if row['trips_total_m2'] != 0 else 0,  # Set to 0 if denominator is zero
                        axis=1
                    )

                    # Compute percentage difference safely, setting to 0 when denominator is zero
                    merged_data_side['trips_total_diff'] = merged_data_side.apply(
                        lambda row: ((row['trips_total_m1'] - row['trips_total_m2']) / row['trips_total_m2'] * 100)
                        if row['trips_total_m2'] != 0 else 0,  # Set to 0 if denominator is zero
                        axis=1
                    )

                    merged_data_side = merged_data_side[['STATION_ID','trips_total_diff']]
                    merged_data_side['trips_total_diff'] = merged_data_side['trips_total_diff'].fillna(0).astype(int).astype(str).map(lambda x: f"{x}%")

                    # Create pivot table and format percentages properly
                    pivot = merged_data.pivot(
                        index='STATION_ID_1', 
                        columns='STATION_ID_2', 
                        values='trips_total_diff'
                    ).fillna(0).astype(int).astype(str).map(lambda x: f"{x}%" if x != "0" else "")  # Convert to string with percentage

                else:
                    pivot = pd.DataFrame()
                    merged_data_side = pd.DataFrame()

            else:
                
                # Compute absolute difference
                merged_data['trips_total_diff'] = merged_data['trips_total_m1'].fillna(0) - merged_data['trips_total_m2'].fillna(0)

                merged_data_side['trips_total_diff'] = merged_data_side['trips_total_m1'].fillna(0) - merged_data_side['trips_total_m2'].fillna(0)
                merged_data_side = merged_data_side[['STATION_ID','trips_total_diff']]


                if show_share:
                    # Create pivot table with formatted numbers
                    pivot = merged_data.pivot(
                        index='STATION_ID_1', 
                        columns='STATION_ID_2', 
                        values='trips_total_diff'
                    ).fillna(0).astype(int).map(lambda x: f"{x}%" if x != "0" else "").replace("0%","")  # Format with comma separator 
                    merged_data_side['trips_total_diff'] = merged_data_side['trips_total_diff'].fillna(0).astype(int).map(lambda x: f"{x}%" if x != "0" else "")
                else:
                    # Create pivot table with formatted numbers
                    pivot = merged_data.pivot(
                        index='STATION_ID_1', 
                        columns='STATION_ID_2', 
                        values='trips_total_diff'
                    ).fillna(0).astype(int).map(lambda x: f"{x:,}").replace("0","")  # Format with comma separator
                    merged_data_side['trips_total_diff'] = merged_data_side['trips_total_diff'].fillna(0).astype(int).map(lambda x: f"{x:,}")



            # Apply conditional formatting
            pivot_table = pivot.style.map(color_ramp)
            side_table = merged_data_side.style.map(color_ramp).hide(axis="index")
        else:
            if show_share:
                # Create pivot table
                pivot_table = filtered_data_1.pivot(
                    index='STATION_ID_1', 
                    columns='STATION_ID_2', 
                    values='trips_total'
                ).fillna(0).astype(int).map(lambda x: f"{x}%" if x != "0" else "").replace("0%","")
                total_stations_1['trips_total'] = total_stations_1['trips_total'].astype(int).map(lambda x: f"{x}%" if x != "0" else "")
            else:
                # Create pivot table
                pivot_table = filtered_data_1.pivot(
                    index='STATION_ID_1', 
                    columns='STATION_ID_2', 
                    values='trips_total'
                ).fillna(0).astype(int).map(lambda x: f"{x:,}" if x != "0" else "").replace("0","")
                total_stations_1['trips_total'] = total_stations_1['trips_total'].astype(int).map(lambda x: f"{x:,}" if x != 0 else "")
            side_table = total_stations_1.style.hide(axis="index")

        # Convert DataFrames to ipywidgets.Output() objects to prevent duplication
        pivot_output = widgets.Output()
        total_stations_output = widgets.Output()

        # Render pivot inside the widget output
        with pivot_output:
            display(pivot_table)

        # Render total_stations inside the widget output
        with total_stations_output:
            display(side_table)

        # Display both tables in an HBox (side by side)
        display(widgets.HBox([pivot_output, total_stations_output]))
    else:
        firstTime = True

# Interactivity
firstTime = False

# create output widget to display filtered DataFrame
#output = widgets.Output()
vbox1 = widgets.VBox([model_1_dropdown, model_2_dropdown])
vbox2 = widgets.VBox([period_select, mode_select])
vbox3 = widgets.VBox([diff_type_checkbox, shares_checkbox])
hbox = widgets.HBox([vbox1, purpose_select, vbox2, vbox3])

# Display interactive output correctly
out = widgets.interactive_output(update_table, {
    'purpose': purpose_select,
    'period': period_select,
    'mode': mode_select,
    'model_1': model_1_dropdown,
    'model_2': model_2_dropdown,
    'show_percent': diff_type_checkbox,
    'show_share': shares_checkbox
})

display(hbox, out, output)



HBox(children=(VBox(children=(Dropdown(description='Select Model:', options=('1-TDM-Recalib', '2-IVT-Test', '3…

Output()

Output()