In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.options.display.float_format = '{:,.3f}'.format

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
ctramp_dir = params['ctramp_dir']
model_outputs_dir = params['model_dir']
summary_dir = params['summary_dir']
concept_id = params['concept_id']
preprocess_dir = _join(ctramp_dir, '_pre_process_files', 'perceived_tt')
perf_measure_columns = params['final_columns']
model_year = params['model_year']
filename_extension = params['filename_extension']
hwy_skims_dir = _join(model_outputs_dir, r'skims\highway' )

skims_dir = _join(model_outputs_dir, r'skims')

In [4]:
Path(summary_dir).mkdir(parents=True, exist_ok=True)
Path(preprocess_dir).mkdir(parents=True, exist_ok=True)

In [5]:
purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

### Calculate the taxi wait time for each origin zone

In [5]:
taz = pd.read_csv(_join(ctramp_dir, 'landuse', 'tazData_' + str(model_year) + '.csv'))
taz['popEmpSqMile'] = (taz['TOTPOP'] + taz['TOTEMP']) / (taz['TOTACRE'] * 0.0015625)

In [6]:
%%time
taz = taz[['ZONE', 'popEmpSqMile']]

# TNC 
#TNC_single_waitTime_mean =  10.3,8.5,8.4,6.3,3.0
#TNC_single_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#TNC_shared_waitTime_mean =  15.0,15.0,11.0,8.0,5.0
#TNC_shared_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#Taxi_waitTime_mean = 26.5,17.3,13.3,9.5,5.5
#Taxi_waitTime_sd =    6.4,6.4,6.4,6.4,6.4

#WaitTimeDistribution_EndPopEmpPerSqMi = 500,2000,5000,15000,9999999999

#TO DO: Ask John which wait time to use
taz['density_group'] = pd.cut(taz['popEmpSqMile'], bins= [-1, 500,2000,5000,15000,9999999999], 
                              labels=[10.3,8.5,8.4,6.3,3.0], ordered=False)
#taz['density_group'] = taz['density_group'].fillna(0)
taz['density_group'] =taz['density_group'].astype("int64")

taz = taz.sort_values('ZONE')
taxi_wait_time = np.repeat(taz['density_group'].values, len(taz)).reshape(len(taz), len(taz))

Wall time: 58.4 ms


### Load all the data from Skims 

In [7]:
%%time
# The data tab of the UEC file lists all the matrix cores and location an matrix files of skims
# 1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

# extract the file names, matrix cores 
matrix_df = pd.read_excel(_join(params['common_dir'], r"TripModeChoice_Updated.xlsx"), sheet_name='data')
matrix_df = matrix_df.iloc[9:]
matrix_df.columns = ['no', 'token', 'format', 'file','matrix', 'group', 'index']
#matrix_df[1:5]

# pre-processing
matrix_df['matrix_files'] = matrix_df['file'].str.replace('skims/', '')
matrix_df['path'] = 'skims'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('nonmot')==True, 'path'] = 'active'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('trnskm')==True, 'path'] = 'transit'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('hwyskm')==True, 'path'] = 'highway'
#matrix_df[1:5]

# Iterate over the DataFrame rows
for _, row in matrix_df.iterrows():
    variable_name = row['token']
    file_path = row['path']
    filename = row['matrix_files']
    matrix_cr = row['matrix']
    
    # Extract the variable name and index (if present)
    if '[' in variable_name:
        name_start = variable_name.index('[')
        name_end = variable_name.index(']')
        index = int(variable_name[name_start+1:name_end])
        variable_name = variable_name[:name_start]
    else:
        index=None
    
    # Read the file using numpy.load() and assign it to the variable with the specified index
    file = omx.open_file(_join(skims_dir, filename))
    file_contents = np.array(file[matrix_cr])
    print(variable_name,index, _join(skims_dir, filename), file_contents.sum(), file_contents.min(), file_contents.max())
    if '[' in row['token']:
        if variable_name in locals() and isinstance(locals()[variable_name], np.ndarray):
            arr = locals()[variable_name]
            if index >= len(arr):
                # Resize the array if the index is out of bounds
                new_arr = np.resize(arr, index + 1)
                new_arr[index] = file_contents
                locals()[variable_name] = new_arr
            else:
                arr[index] = file_contents
        else:
            arr = np.empty(index + 1, dtype=object)
            arr[index] = file_contents
            locals()[variable_name] = arr
    else:
        locals()[variable_name] = file_contents

DISTWALK None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\nonmotskm.omx 10796411595951.248 0.03142102696417673 1000000.0
DISTBIKE None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\nonmotskm.omx 8910510616075.47 0.018911417199612486 1000000.0
SOV_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\hwyskmEA.omx 13965190000.0 0.11402832 1000000.0
SOV_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\hwyskmEA.omx 13847476000.0 0.035375483 1000000.0
SOV_BTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\hwyskmEA.omx 800814500.0 0.0 472.0
SOV_VTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\hwyskmEA.omx 740758100.0 0.0 334.74524
HOV2_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\hwyskmEA.omx 13957872000.0 0.11402832 1000000.0
HOV2_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skim

WLK_TRN_WLK_IVT_LOC 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 14543465000.0 0.0 21880.174
WLK_TRN_WLK_IVT_EXP 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 13815339000.0 0.0 27026.084
WLK_TRN_WLK_IVT_LRT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 2572853500.0 0.0 8251.378
WLK_TRN_WLK_IVT_FRY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 87460290.0 0.0 3594.0486
WLK_TRN_WLK_IVT_HVY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 30492733000.0 0.0 12912.519
WLK_TRN_WLK_IVT_COM 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 10755655000.0 0.0 16536.756
WLK_TRN_WLK_FAR 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_WLK.omx 5018942000.0 0.0 1631.

WLK_TRN_WLK_IVT_LRT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 2080319200.0 0.0 7640.458
WLK_TRN_WLK_IVT_FRY 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 4136954400.0 0.0 14029.791
WLK_TRN_WLK_IVT_HVY 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 29584966000.0 0.0 13322.794
WLK_TRN_WLK_IVT_COM 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 11965979000.0 0.0 16475.254
WLK_TRN_WLK_FAR 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 5055170000.0 0.0 1631.282
WLK_TRN_WLK_WAUX 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 24727175000.0 0.0 90206.86
WLK_TRN_WLK_IWAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_WLK.omx 9898355000.0 0.0 9000.0
WLK_

PNR_TRN_WLK_TOTIVT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 66315633000.0 0.0 34532.05
PNR_TRN_WLK_CROWD 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 3748534300.0 0.0 2744.585
PNR_TRN_WLK_IVT_LOC 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 11143452000.0 0.0 16931.008
PNR_TRN_WLK_IVT_EXP 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 5544131600.0 0.0 16301.048
PNR_TRN_WLK_IVT_LRT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 2373068500.0 0.0 8263.789
PNR_TRN_WLK_IVT_FRY 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 4267362300.0 0.0 13456.629
PNR_TRN_WLK_IVT_HVY 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_PNR_TRN_WLK.omx 30589530000.0 0.0 13942

KNR_TRN_WLK_DDIST 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmAM_KNR_TRN_WLK.omx 10516116000.0 0.0 13541.76
KNR_TRN_WLK_WAUX 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmAM_KNR_TRN_WLK.omx 5729775000.0 0.0 13679.823
KNR_TRN_WLK_IWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmAM_KNR_TRN_WLK.omx 7486531000.0 0.0 14628.0
KNR_TRN_WLK_XWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmAM_KNR_TRN_WLK.omx 3560811800.0 0.0 12433.801
KNR_TRN_WLK_BOARDS 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmAM_KNR_TRN_WLK.omx 16843504.0 0.0 7.096426
KNR_TRN_WLK_WAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_KNR_TRN_WLK.omx 10602806000.0 0.0 15000.0
KNR_TRN_WLK_TOTIVT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_KNR_TRN_WLK.omx 48732533000.0 0.0 31996.783
KNR_TRN_WLK

WLK_TRN_PNR_IVT_FRY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 17032348.0 0.0 3594.0486
WLK_TRN_PNR_IVT_HVY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 26333917000.0 0.0 12912.519
WLK_TRN_PNR_IVT_COM 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 7940736000.0 0.0 16536.756
WLK_TRN_PNR_FAR 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 4220957400.0 0.0 1512.8937
WLK_TRN_PNR_DTIM 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx -6.653234e+28 -4.354167e+22 16051.4375
WLK_TRN_PNR_DDIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 10150680000.0 0.0 13237.568
WLK_TRN_PNR_WAUX 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEA_WLK_TRN_PNR.omx 3849469200.0 0.0 42

WLK_TRN_PNR_WAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 19190446000.0 0.0 18309.602
WLK_TRN_PNR_TOTIVT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 54061126000.0 0.0 33111.746
WLK_TRN_PNR_CROWD 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 864440300.0 0.0 2591.637
WLK_TRN_PNR_IVT_LOC 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 10404224000.0 0.0 10824.764
WLK_TRN_PNR_IVT_EXP 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 5161590000.0 0.0 14251.668
WLK_TRN_PNR_IVT_LRT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 1323272600.0 0.0 7320.2
WLK_TRN_PNR_IVT_FRY 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmEV_WLK_TRN_PNR.omx 2758886700.0 0.0 12575.088


WLK_TRN_KNR_DTIM 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx -3.3783567e+28 -4.3541666e+22 18294.002
WLK_TRN_KNR_DDIST 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx 13857707000.0 0.0 13667.499
WLK_TRN_KNR_WAUX 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx 2758322000.0 0.0 47402.938
WLK_TRN_KNR_IWAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx 9374633000.0 0.0 15000.0
WLK_TRN_KNR_XWAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx 3256038700.0 0.0 10863.635
WLK_TRN_KNR_BOARDS 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmMD_WLK_TRN_KNR.omx 16974178.0 0.0 6.5
WLK_TRN_KNR_WAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\trnskmPM_WLK_TRN_KNR.omx 13682957000.0 0.0 18016.24
WLK_

In [8]:
# change th 1000000.0 values in DISTWALK and DISTBIKE to 0
DISTWALK = np.where(DISTWALK == 1000000.0, 0, DISTWALK)
DISTBIKE = np.where(DISTBIKE == 1000000.0, 0, DISTBIKE)
PNR_TRN_WLK_DTIM[1] =  np.where(PNR_TRN_WLK_DTIM[1] < 0, 0, PNR_TRN_WLK_DTIM[1])
PNR_TRN_WLK_DTIM[2] =  np.where(PNR_TRN_WLK_DTIM[2] < 0, 0, PNR_TRN_WLK_DTIM[2])
PNR_TRN_WLK_DTIM[3] =  np.where(PNR_TRN_WLK_DTIM[3] < 0, 0, PNR_TRN_WLK_DTIM[3])
PNR_TRN_WLK_DTIM[4] =  np.where(PNR_TRN_WLK_DTIM[4] < 0, 0, PNR_TRN_WLK_DTIM[4])
PNR_TRN_WLK_DTIM[5] =  np.where(PNR_TRN_WLK_DTIM[5] < 0, 0, PNR_TRN_WLK_DTIM[5])

KNR_TRN_WLK_DTIM[1] =  np.where(KNR_TRN_WLK_DTIM[1] < 0, 0, KNR_TRN_WLK_DTIM[1])
KNR_TRN_WLK_DTIM[2] =  np.where(KNR_TRN_WLK_DTIM[2] < 0, 0, KNR_TRN_WLK_DTIM[2])
KNR_TRN_WLK_DTIM[3] =  np.where(KNR_TRN_WLK_DTIM[3] < 0, 0, KNR_TRN_WLK_DTIM[3])
KNR_TRN_WLK_DTIM[4] =  np.where(KNR_TRN_WLK_DTIM[4] < 0, 0, KNR_TRN_WLK_DTIM[4])
KNR_TRN_WLK_DTIM[5] =  np.where(KNR_TRN_WLK_DTIM[5] < 0, 0, KNR_TRN_WLK_DTIM[5])

WLK_TRN_PNR_DTIM[1] =  np.where(WLK_TRN_PNR_DTIM[1] < 0, 0, WLK_TRN_PNR_DTIM[1])
WLK_TRN_PNR_DTIM[2] =  np.where(WLK_TRN_PNR_DTIM[2] < 0, 0, WLK_TRN_PNR_DTIM[2])
WLK_TRN_PNR_DTIM[3] =  np.where(WLK_TRN_PNR_DTIM[3] < 0, 0, WLK_TRN_PNR_DTIM[3])
WLK_TRN_PNR_DTIM[4] =  np.where(WLK_TRN_PNR_DTIM[4] < 0, 0, WLK_TRN_PNR_DTIM[4])
WLK_TRN_PNR_DTIM[5] =  np.where(WLK_TRN_PNR_DTIM[5] < 0, 0, WLK_TRN_PNR_DTIM[5])

WLK_TRN_KNR_DTIM[1] =  np.where(WLK_TRN_KNR_DTIM[1] < 0, 0, WLK_TRN_KNR_DTIM[1])
WLK_TRN_KNR_DTIM[2] =  np.where(WLK_TRN_KNR_DTIM[2] < 0, 0, WLK_TRN_KNR_DTIM[2])
WLK_TRN_KNR_DTIM[3] =  np.where(WLK_TRN_KNR_DTIM[3] < 0, 0, WLK_TRN_KNR_DTIM[3])
WLK_TRN_KNR_DTIM[4] =  np.where(WLK_TRN_KNR_DTIM[4] < 0, 0, WLK_TRN_KNR_DTIM[4])
WLK_TRN_KNR_DTIM[5] =  np.where(WLK_TRN_KNR_DTIM[5] < 0, 0, WLK_TRN_KNR_DTIM[5])

In [9]:
# randomly check few matrix cores
#PNR_TRN_WLK_DDIST[4].sum()
#PNR_TRN_WLK_DDIST[2].sum()

In [9]:
# ct ramp has params.properties which has certain parameter values used in the utility equations. 
# Following function extracts these values.


def extract_property_values(file_path, variables):
    property_values = {}
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line and not line.startswith('#'):
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                if key in variables:
                    property_values[key] = value
    return property_values

In [10]:
%%time
for purp in purpose:
    #print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice_Updated.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    # Removing NAs
    uec_purp_params_prop = uec_purp.loc[~uec_purp['Token'].isna()]
    # extract the parameters that have % in in their names, clean up-remove % and replace . with _
    uec_purp_params_prop = uec_purp_params_prop.loc[(uec_purp_params_prop['Formula for variable'].str.contains('%')==True)]
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace('%', '') 
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace(".", "_")
    # read parameters file
    file_path = _join(ctramp_dir, 'input', 'params.properties')
    # extract list of parameters
    prop_variables = list(uec_purp_params_prop['Formula for variable'])
    prop_variables_tokens = list(uec_purp_params_prop['Token'])
    prop_variables = [x.replace('_', '.') for x in prop_variables]

    values = extract_property_values(file_path, prop_variables)
    
    # Create a dictionary to store the extracted values
    extracted_values = {}

    # Assign the extracted values to the dictionary
    for variable, value in values.items():
        extracted_values[variable] = value

    # Print the values from the extracted_values dictionary 
    for variable, value in extracted_values.items():
        #print(f'{variable}: {value}')
        exec(f'{variable.replace(".", "_")} = {value}')
    
    
    # Assign the values to tokens
    # example costInitialTaxi = %taxi.baseFare%
    for _, row in uec_purp_params_prop.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
            #print(f"Variable '{variable_name}' is defined.")
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue

    
    uec_purp_params = uec_purp.loc[~uec_purp['Formula for variable'].isna()]
    uec_purp_params = uec_purp_params.loc[~uec_purp_params['Token'].isna()]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('if')==True)]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('%')==True)]

    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].astype(str)
    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].str.replace('@', '')

    key_column = 'Token'
    value_column = 'Formula for variable'

    # Create dictionary from selected columns
    data_dict = {}

    for _, row in uec_purp_params.iterrows():
        key = row[key_column]
        value = row[value_column]

        # Handle values that are strings
        if isinstance(value, str):
            try:
                value = int(value)
                data_dict[key] = value
            except ValueError:
                try:
                    value = float(value)
                    data_dict[key] = value
                except ValueError:
                    pass

    #get all the parameters
    variables = data_dict

    for _, row in uec_purp_params.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue
    
    #break
    
    int_zone = 3332
    da_util = np.empty((5, int_zone, int_zone))
    sr2_util = np.empty((5, int_zone, int_zone))
    sr3_util = np.empty((5, int_zone, int_zone))
    wlk_util =  np.empty((5, int_zone, int_zone))
    bike_util = np.empty((5, int_zone, int_zone))
    wlk_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_pnr_util = np.empty((5, int_zone, int_zone))
    pnr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_knr_util = np.empty((5, int_zone, int_zone))
    knr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    taxi_util = np.empty((5, int_zone, int_zone))
    
    for tripPeriod in time_period:
        #for trip_mode in oth_modes:
        #uec_purp_mode = uec_purp_df.loc[uec_purp_df['Description'].str.contains(trip_mode)==True]
        #uec_purp_mode['Formula for variable'] = uec_purp_mode['Formula for variable'].str.replace('tripPeriod', str(period))
        #uec_purp_mode['formula_calculation'] = 
        util = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'),'w')

        #Drive alone
        util['DA'] = c_ivt*SOV_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'DA', " ", np.array(util['DA']).min(), np.array(util['DA']).max())

        #Shared ride 2
        util['SR2'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR2', " ", np.array(util['SR2']).min(), np.array(util['SR2']).max())

        #Shared ride 3
        util['SR3'] = c_ivt*HOV3_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR3', " ", np.array(util['SR3']).min(), np.array(util['SR3']).max())

        # Walk 
        util['WALK'] = (walk_dist<=1)* (c_walkTimeShort * np.minimum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) + \
                       (walk_dist>1)* (c_walkTimeLong * np.maximum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) 
        print(tripPeriod, purp, 'WALK', " ", np.array(util['WALK']).min(), np.array(util['WALK']).max())
        
        #Bike
        util['BIKE'] = (bike_dist<=6)*(c_bikeTimeShort* np.minimum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed)) + \
                       (bike_dist>6)*(c_bikeTimeLong* np.maximum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed))
        print(tripPeriod, purp, 'BIKE', " ", np.array(util['BIKE']).min(), np.array(util['BIKE']).max())
        
        
        #Walk transit Walk
        util['WLK_TRN_WLK'] =  c_ivt*WLK_TRN_WLK_IVT_LOC[tripPeriod]/100 + \
                            c_ivt_exp*WLK_TRN_WLK_IVT_EXP[tripPeriod]/100 + \
                            c_ivt_lrt*WLK_TRN_WLK_IVT_LRT[tripPeriod]/100 + \
                            c_ivt_ferry*WLK_TRN_WLK_IVT_FRY[tripPeriod]/100 + \
                            c_ivt_hvy*WLK_TRN_WLK_IVT_HVY[tripPeriod]/100 + \
                            c_ivt_com*WLK_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_wlk * np.maximum(WLK_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_WLK_WAUX[tripPeriod]/100
        
        print(tripPeriod, purp, 'WLK_TRN_WLK', " ", np.array(util['WLK_TRN_WLK']).min(), np.array(util['WLK_TRN_WLK']).max())
        
        # Walk Transit PNR - Inbound
        util['WLK_TRN_PNR'] =  c_ivt*WLK_TRN_PNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_PNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_PNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_PNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_PNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_PNR_XWAIT[tripPeriod]/100 + \
                            c_dtim*WLK_TRN_PNR_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_PNR_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_PNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_PNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_PNR', " ", np.array(util['WLK_TRN_PNR']).min(), np.array(util['WLK_TRN_PNR']).max())

        # PNR transit Walk - Outbound
        util['PNR_TRN_WLK'] =  c_ivt*PNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*PNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*PNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(PNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(PNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*PNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_dtim*PNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_waux*PNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(PNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'PNR_TRN_WLK', " ", np.array(util['PNR_TRN_WLK']).min(), np.array(util['PNR_TRN_WLK']).max())

        # Walk Transit KNR - Inbound
        util['WLK_TRN_KNR'] = c_ivt*WLK_TRN_KNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_KNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_KNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_KNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_KNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_KNR_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_KNR_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*WLK_TRN_KNR_DTIM[tripPeriod]/100 + \
                            c_waux*WLK_TRN_KNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_KNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_KNR', " ", np.array(util['WLK_TRN_KNR']).min(), np.array(util['WLK_TRN_KNR']).max())

        # KNR Transit Walk - Outbound
        util['KNR_TRN_WLK'] = c_ivt*KNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*KNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*KNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(KNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(KNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*KNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(KNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*KNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_waux*KNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(KNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'KNR_TRN_WLK', " ", np.array(util['KNR_TRN_WLK']).min(), np.array(util['KNR_TRN_WLK']).max())

        # taxi
        util['RIDEHAIL'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]  + c_ivt*1.5*taxi_wait_time
        print(tripPeriod, purp, 'RIDEHAIL', " ", np.array(util['RIDEHAIL']).min(), np.array(util['RIDEHAIL']).max())
        
    
        util.close()

1 Work DA   0.11402832 1000000.0
1 Work SR2   0.11402832 1000000.0
1 Work SR3   0.11402832 1000000.0
1 Work WALK   0.0 873.8145954581506
1 Work BIKE   0.0 2169.2814269893684
1 Work WLK_TRN_WLK   0.0 2462.003
1 Work WLK_TRN_PNR   0.0 1195.7017
1 Work PNR_TRN_WLK   0.0 1760.9365
1 Work WLK_TRN_KNR   0.0 1191.6678
1 Work KNR_TRN_WLK   0.0 1735.9254
1 Work RIDEHAIL   4.6140283197164536 1000015.0
2 Work DA   0.11456831 1000000.0
2 Work SR2   0.11456831 1000000.0
2 Work SR3   0.11456831 1000000.0
2 Work WALK   0.0 873.8145954581506
2 Work BIKE   0.0 2169.2814269893684
2 Work WLK_TRN_WLK   0.0 1557.0641
2 Work WLK_TRN_PNR   0.0 1344.3217
2 Work PNR_TRN_WLK   0.0 840.4518
2 Work WLK_TRN_KNR   0.0 1333.2927
2 Work KNR_TRN_WLK   0.0 699.4796
2 Work RIDEHAIL   4.614568307995796 1000015.0
3 Work DA   0.11527477 1000000.0
3 Work SR2   0.11527477 1000000.0
3 Work SR3   0.11527477 1000000.0
3 Work WALK   0.0 873.8145954581506
3 Work BIKE   0.0 2169.2814269893684
3 Work WLK_TRN_WLK   0.0 1991.739
3 Wo

5 Escort WLK_TRN_WLK   0.0 2088.0557
5 Escort WLK_TRN_PNR   0.0 1499.6587
5 Escort PNR_TRN_WLK   0.0 1351.9441
5 Escort WLK_TRN_KNR   0.0 1343.6875
5 Escort KNR_TRN_WLK   0.0 1215.2727
5 Escort RIDEHAIL   4.6140900403261185 1000015.0
1 Shopping DA   0.11402832 1000000.0
1 Shopping SR2   0.11402832 1000000.0
1 Shopping SR3   0.11402832 1000000.0
1 Shopping WALK   0.0 873.8145954581506
1 Shopping BIKE   0.0 2169.2814269893684
1 Shopping WLK_TRN_WLK   0.0 2429.751
1 Shopping WLK_TRN_PNR   0.0 1195.7017
1 Shopping PNR_TRN_WLK   0.0 1760.9365
1 Shopping WLK_TRN_KNR   0.0 1191.6678
1 Shopping KNR_TRN_WLK   0.0 1735.9254
1 Shopping RIDEHAIL   4.6140283197164536 1000015.0
2 Shopping DA   0.11456831 1000000.0
2 Shopping SR2   0.11456831 1000000.0
2 Shopping SR3   0.11456831 1000000.0
2 Shopping WALK   0.0 873.8145954581506
2 Shopping BIKE   0.0 2169.2814269893684
2 Shopping WLK_TRN_WLK   0.0 1533.3799
2 Shopping WLK_TRN_PNR   0.0 1344.3217
2 Shopping PNR_TRN_WLK   0.0 840.4518
2 Shopping WLK_TR

4 Social PNR_TRN_WLK   0.0 835.58826
4 Social WLK_TRN_KNR   0.0 1289.5059
4 Social KNR_TRN_WLK   0.0 825.34314
4 Social RIDEHAIL   4.6144483387470245 1000015.0
5 Social DA   0.11409004 1000000.0
5 Social SR2   0.11409004 1000000.0
5 Social SR3   0.11409004 1000000.0
5 Social WALK   0.0 873.8145954581506
5 Social BIKE   0.0 2169.2814269893684
5 Social WLK_TRN_WLK   0.0 2088.0557
5 Social WLK_TRN_PNR   0.0 1499.6587
5 Social PNR_TRN_WLK   0.0 1351.9441
5 Social WLK_TRN_KNR   0.0 1343.6875
5 Social KNR_TRN_WLK   0.0 1215.2727
5 Social RIDEHAIL   4.6140900403261185 1000015.0
1 OthDiscr DA   0.11402832 1000000.0
1 OthDiscr SR2   0.11402832 1000000.0
1 OthDiscr SR3   0.11402832 1000000.0
1 OthDiscr WALK   0.0 873.8145954581506
1 OthDiscr BIKE   0.0 2169.2814269893684
1 OthDiscr WLK_TRN_WLK   0.0 2429.751
1 OthDiscr WLK_TRN_PNR   0.0 1195.7017
1 OthDiscr PNR_TRN_WLK   0.0 1760.9365
1 OthDiscr WLK_TRN_KNR   0.0 1191.6678
1 OthDiscr KNR_TRN_WLK   0.0 1735.9254
1 OthDiscr RIDEHAIL   4.6140283197

In [11]:
# add mappings from time period and purpose
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
len(df_trips)

35981987

In [12]:
# inbound trips get orig purpose, outbound trips get dest purpose
df_trips['util_purpose'] = np.where(df_trips['inbound']==1, df_trips['orig_purpose'], df_trips['dest_purpose'])

In [13]:
#df_trips['util_purpose'].value_counts()

In [14]:
purp_dict = { 'work' : 'Work', 
              'shopping' : 'Shopping',
              'escort' : 'Escort', 
              'othdiscr': 'OthDiscr',
              'othmaint': 'OthMaint',
              'school' : 'School', 
              'eatout' : 'EatOut', 
              'atwork' : 'WorkBased', 
              'social' : 'Social',
              'university' : 'University'}

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

In [15]:
df_trips['util_purpose'] = df_trips['util_purpose'].map(purp_dict)

In [16]:
%%time

import pandas as pd
import itertools

num_zones = 3332

for tripPeriod, value in time_period.items():

    for purp in purpose:
        print(f'Analyzing purpose: {purp} and time period: {value}')
        #df_temp = df_trips.query(f"util_purpose == {purp} and Period == {value.lower()}")
        #df_temp = df_trips.query(f"util_purpose == '{purp}' and Period == '{value.lower}'")
        df_temp = df_trips.loc[(df_trips['util_purpose'] == purp) & (df_trips['Period'] == value.lower())]
        
        # Generate all combinations of orig and dest
        combinations = list(itertools.product(range(1, num_zones + 1), repeat=2))

        # Create the DataFrame with orig and dest columns
        purp_df = pd.DataFrame(combinations, columns=['orig', 'dest'])

        # read utility files
        util_file = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'))

        for core in util_file.list_matrices():
            print(f'extracting {core} core form utility file')
            mode_core = np.array(util_file[core])
            mode_core = np.where(mode_core == 0, -999, mode_core)
            skm_df = pd.DataFrame(mode_core)
            skm_df = pd.melt(skm_df.reset_index(), id_vars='index', value_vars=skm_df.columns)
            skm_df['index'] = skm_df['index'] + 1
            skm_df['variable'] = skm_df['variable'] + 1
            skm_df.columns = ['orig', 'dest', core]
            purp_df = pd.merge(purp_df, skm_df, on=['orig', 'dest'], how='left')
        
        df_temp = pd.merge(df_temp, purp_df, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        
        print(f'writing the trip file for purpose : {purp} and time period: {value}', df_temp.shape)
        df_temp.to_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        
        #break

Analyzing purpose: Work and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Work and time period: EA (305666, 40)
Analyzing purpose: University and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility f

Analyzing purpose: Shopping and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Shopping and time period: AM (740724, 40)
Analyzing purpose: EatOut and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utili

Analyzing purpose: OthDiscr and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : OthDiscr and time period: MD (1071502, 40)
Analyzing purpose: WorkBased and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form u

extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : School and time period: EV (141579, 40)
Analyzing purpose: Escort and time period: EV
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility f

In [6]:
# combine all trips into one file

# read trip files
final_trips = []

for tripPeriod, value in time_period.items():
    for purp in purpose:
        temp = pd.read_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        final_trips.append(temp)
        
final_trips = pd.concat(final_trips)
len(final_trips) 

35981987

In [7]:
final_trips[1:10]

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,...,DA,KNR_TRN_WLK,PNR_TRN_WLK,RIDEHAIL,SR2,SR3,WALK,WLK_TRN_KNR,WLK_TRN_PNR,WLK_TRN_WLK
1,2158112,5013525.0,1,work,Home,900,1,5,1,1.0,...,35.226,107.509,98.192,47.226,35.226,34.681,-999.0,-999.0,-999.0,145.335
2,2158150,5013627.0,1,work,Home,646,1,5,8,1.0,...,16.113,47.697,71.723,20.613,16.113,16.16,-999.0,30.474,31.542,68.807
3,2219080,5173734.0,1,work,Home,274,2,5,4,1.0,...,1.698,16.936,77.149,6.198,1.698,1.698,22.762,24.022,58.198,-999.0
4,2219453,5174821.0,1,work,Home,645,2,5,2,1.0,...,12.764,33.858,78.153,17.264,12.764,12.764,-999.0,23.001,36.819,68.065
5,2219454,5174825.0,1,work,Home,2,2,5,4,1.0,...,0.289,-999.0,-999.0,4.789,0.289,0.289,3.214,-999.0,-999.0,-999.0
6,2218978,5173418.0,1,work,Home,610,2,5,6,1.0,...,10.486,29.517,83.77,14.986,10.486,10.486,-999.0,25.555,30.868,55.784
7,2218657,5172455.0,1,work,Home,472,2,5,1,1.0,...,3.643,15.936,79.991,12.643,3.643,3.643,33.933,16.047,56.847,-999.0
8,1777746,4318702.0,1,work,Home,612,3,5,2,1.0,...,5.899,25.403,61.413,10.399,5.899,5.899,365.091,14.096,42.379,24.892
9,1775075,4314268.0,1,work,Home,10,3,5,2,1.0,...,6.851,28.158,61.207,11.351,6.851,6.851,398.344,26.188,42.379,25.575


In [8]:
final_trips.columns

Index(['hh_id', 'person_id', 'inbound', 'orig_purpose', 'dest_purpose',
       'orig_taz', 'dest_taz', 'depart_hour', 'trip_mode', 'sampleRate',
       'trip_type', 'trips', 'transbay_od', 'orig_rdm_zones',
       'orig_super_dist', 'orig_county', 'dest_rdm_zones', 'dest_super_dist',
       'dest_county', 'home_zone', 'income', 'Income', 'pp_share',
       'link21_trip_purp', 'Period', 'Mode', 'util_purpose', 'orig', 'dest',
       'BIKE', 'DA', 'KNR_TRN_WLK', 'PNR_TRN_WLK', 'RIDEHAIL', 'SR2', 'SR3',
       'WALK', 'WLK_TRN_KNR', 'WLK_TRN_PNR', 'WLK_TRN_WLK'],
      dtype='object')

In [33]:
#### Add distance
md_dist = omx.open_file(_join(skims_dir, 'HWYSKMmd.omx'))
md_dist = skim_core_to_df(md_dist, 'DISTDAM', cols =['orig', 'dest', 'dist'])

final_trips = pd.merge(final_trips, md_dist, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')

def calculate_weighted_average_by_category(df):
    weighted_avgs = df.groupby('Period').apply(
        lambda group: (group['trips'] * group['dist']).sum() / group['trips'].sum()
    )
    
    weighted_avgs = weighted_avgs.reset_index()
    weighted_avgs.columns = ['Period', 'dist']
    weighted_avgs_all = pd.DataFrame(columns=['Period', 'dist'])
    weighted_avgs_all.loc[0] = ['All', (df['dist'] * df['trips']).sum() / df['trips'].sum()]
 
    weighted_avgs_all = pd.concat([weighted_avgs_all, weighted_avgs], ignore_index=True)
    
    return weighted_avgs_all

In [47]:
da = final_trips.loc[final_trips['trip_mode'] == 1]
sr2 = final_trips.loc[final_trips['trip_mode'] == 2]
sr3 = final_trips.loc[final_trips['trip_mode'] == 3]
walk = final_trips.loc[final_trips['trip_mode'] == 4]
bike = final_trips.loc[final_trips['trip_mode'] == 5]
rh = final_trips.loc[final_trips['trip_mode'] == 9]

#transit
wtw = final_trips.loc[final_trips['trip_mode'] == 6]

ptw = final_trips.loc[(final_trips['trip_mode'] == 7) & (final_trips['inbound'] == 0)]
wtp = final_trips.loc[(final_trips['trip_mode'] == 7) & (final_trips['inbound'] == 1)]

ktw = final_trips.loc[(final_trips['trip_mode'] == 8) & (final_trips['inbound'] == 0)]
wtk = final_trips.loc[(final_trips['trip_mode'] == 8) & (final_trips['inbound'] == 1)]

In [48]:
da_tt = summarize_all_combinations(da, groupby_columns=['Period'], summary_column='DA')
da_tt = da_tt.rename(columns={'Value': 'DA'})

sr2_tt = summarize_all_combinations(sr2, groupby_columns=['Period'], summary_column='SR2')
sr2_tt = sr2_tt.rename(columns={'Value': 'SR2'})

sr3_tt = summarize_all_combinations(sr3, groupby_columns=['Period'], summary_column='SR3')
sr3_tt = sr3_tt.rename(columns={'Value': 'SR3'})

walk_tt = summarize_all_combinations(walk, groupby_columns=['Period'], summary_column='WALK')
walk_tt = walk_tt.rename(columns={'Value': 'WALK'})

bike_tt = summarize_all_combinations(bike, groupby_columns=['Period'], summary_column='BIKE')
bike_tt = bike_tt.rename(columns={'Value': 'BIKE'})

wtw_tt = summarize_all_combinations(wtw, groupby_columns=['Period'], summary_column='WLK_TRN_WLK')
wtw_tt = wtw_tt.rename(columns={'Value': 'WLK_TRN_WLK'})

ptw_tt = summarize_all_combinations(ptw, groupby_columns=['Period'], summary_column='PNR_TRN_WLK')
ptw_tt = ptw_tt.rename(columns={'Value': 'PNR_TRN_WLK'})

wtp_tt = summarize_all_combinations(wtp, groupby_columns=['Period'], summary_column='WLK_TRN_PNR')
wtp_tt = wtp_tt.rename(columns={'Value': 'WLK_TRN_PNR'})

ktw_tt = summarize_all_combinations(ktw, groupby_columns=['Period'], summary_column='KNR_TRN_WLK')
ktw_tt = ktw_tt.rename(columns={'Value': 'KNR_TRN_WLK'})

wtk_tt = summarize_all_combinations(wtk, groupby_columns=['Period'], summary_column='WLK_TRN_KNR')
wtk_tt = wtk_tt.rename(columns={'Value': 'WLK_TRN_KNR'})

rh_tt = summarize_all_combinations(rh, groupby_columns=['Period'], summary_column='RIDEHAIL')
rh_tt = rh_tt.rename(columns={'Value': 'RIDEHAIL'})

In [49]:
all_modes=pd.merge(da_tt, sr2_tt, on='Period').merge(sr3_tt, on='Period').merge(
    walk_tt, on='Period').merge(
    bike_tt, on='Period').merge(
    wtw_tt, on='Period').merge(
    ptw_tt, on='Period').merge(
    wtp_tt, on='Period').merge(
    ktw_tt, on='Period').merge(
    wtk_tt, on='Period').merge(
    rh_tt, on='Period')

In [50]:
# trips
da_trip = summarize_all_combinations(da, groupby_columns=['Period'], summary_column='trips')
da_trip = da_trip.rename(columns={'Value': 'DA'})

sr2_trip = summarize_all_combinations(sr2, groupby_columns=['Period'], summary_column='trips')
sr2_trip = sr2_trip.rename(columns={'Value': 'SR2'})

sr3_trip = summarize_all_combinations(sr3, groupby_columns=['Period'], summary_column='trips')
sr3_trip = sr3_trip.rename(columns={'Value': 'SR3'})

walk_trip = summarize_all_combinations(walk, groupby_columns=['Period'], summary_column='trips')
walk_trip = walk_trip.rename(columns={'Value': 'WALK'})

bike_trip = summarize_all_combinations(bike, groupby_columns=['Period'], summary_column='trips')
bike_trip = bike_trip.rename(columns={'Value': 'BIKE'})

wtw_trip = summarize_all_combinations(wtw, groupby_columns=['Period'], summary_column='trips')
wtw_trip = wtw_trip.rename(columns={'Value': 'WLK_TRN_WLK'})

ptw_trip = summarize_all_combinations(ptw, groupby_columns=['Period'], summary_column='trips')
ptw_trip = ptw_trip.rename(columns={'Value': 'PNR_TRN_WLK'})

wtp_trip = summarize_all_combinations(wtp, groupby_columns=['Period'], summary_column='trips')
wtp_trip = wtp_trip.rename(columns={'Value': 'WLK_TRN_PNR'})

ktw_trip = summarize_all_combinations(ktw, groupby_columns=['Period'], summary_column='trips')
ktw_trip = ktw_trip.rename(columns={'Value': 'KNR_TRN_WLK'})

wtk_trip = summarize_all_combinations(wtk, groupby_columns=['Period'], summary_column='trips')
wtk_trip = wtk_trip.rename(columns={'Value': 'WLK_TRN_KNR'})

rh_trip = summarize_all_combinations(rh, groupby_columns=['Period'], summary_column='trips')
rh_trip = rh_trip.rename(columns={'Value': 'RIDEHAIL'})

In [51]:
all_modes_trips =pd.merge(da_trip, sr2_trip, on='Period').merge(sr3_trip, on='Period').merge(
    walk_trip, on='Period').merge(
    bike_trip, on='Period').merge(
    wtw_trip, on='Period').merge(
    ptw_trip, on='Period').merge(
    wtp_trip, on='Period').merge(
    ktw_trip, on='Period').merge(
    wtk_trip, on='Period').merge(
    rh_trip, on='Period')

In [52]:
da_dist

Unnamed: 0,Period,DA
0,All,6.606
1,am,7.908
2,ea,9.889
3,ev,6.974
4,md,5.34
5,pm,6.427


In [53]:
# distance
da_dist = calculate_weighted_average_by_category(da)
da_dist = da_dist.rename(columns={'dist': 'DA'})

sr2_dist = calculate_weighted_average_by_category(sr2)
sr2_dist = sr2_dist.rename(columns={'dist': 'SR2'})

sr3_dist = calculate_weighted_average_by_category(sr3)
sr3_dist = sr3_dist.rename(columns={'dist': 'SR3'})

walk_dist = calculate_weighted_average_by_category(walk)
walk_dist = walk_dist.rename(columns={'dist': 'WALK'})

bike_dist = calculate_weighted_average_by_category(bike)
bike_dist= bike_dist.rename(columns={'dist': 'BIKE'})

wtw_dist = calculate_weighted_average_by_category(wtw)
wtw_dist = wtw_dist.rename(columns={'dist': 'WLK_TRN_WLK'})

ptw_dist = calculate_weighted_average_by_category(ptw)
ptw_dist = ptw_dist.rename(columns={'dist': 'PNR_TRN_WLK'})

wtp_dist = calculate_weighted_average_by_category(wtp)
wtp_dist = wtp_dist.rename(columns={'dist': 'WLK_TRN_PNR'})

ktw_dist = calculate_weighted_average_by_category(ktw)
ktw_dist = ktw_dist.rename(columns={'dist': 'KNR_TRN_WLK'})

wtk_dist = calculate_weighted_average_by_category(wtk)
wtk_dist = wtk_dist.rename(columns={'dist': 'WLK_TRN_KNR'})

rh_dist = calculate_weighted_average_by_category(rh)
rh_dist = rh_dist.rename(columns={'dist': 'WLK_TRN_KNR'})

In [54]:
all_modes_dist =pd.merge(da_dist, sr2_dist, on='Period').merge(sr3_dist, on='Period').merge(
    walk_dist, on='Period').merge(
    bike_dist, on='Period').merge(
    wtw_dist, on='Period').merge(
    ptw_dist, on='Period').merge(
    wtp_dist, on='Period').merge(
    ktw_dist, on='Period').merge(
    wtk_dist, on='Period').merge(
    rh_dist, on='Period')

In [55]:
all_modes.to_csv("perceived_tt_"+concept_id+".csv")
all_modes_trips.to_csv("trips_tt_"+concept_id+".csv")
all_modes_dist.to_csv("trips_dist_tt_"+concept_id+".csv")

## Creating Summaries

In [None]:
# Regional Value

def create_summaries(final_trips, summary_col, filename_verbose, metric_num, filename_extension, mode_numbers):
    
    temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
    # Region
    df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
    df_region_period = df_region_period[['Period', 'Value']]

    df_region_period['Concept_ID'] = concept_id
    df_region_period['Metric_ID'] = metric_num
    df_region_period['Metric_name'] = 'Travel time savings'
    df_region_period['Submetric'] = metric_num + '.1'
    df_region_period['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_region_period['Population'] = 'Whole Population'
    df_region_period['Geography'] = 'Region'
    df_region_period['Zone_ID'] = ''
    df_region_period['Income'] = ''
    df_region_period['Mode'] = ''
    df_region_period['Purpose'] = ''
    df_region_period['Origin_zone'] = ''
    df_region_period['Dest_zone'] = ''
    df_region_period['Units'] = 'minutes'
    df_region_period['Total_Increment'] = ''
    
    # County
    df_cnty = summarize_all_combinations(temp, groupby_columns=['orig_county', 'dest_county', 'Period'], 
                                           summary_column=summary_col)

    df_cnty = df_cnty.rename(columns={ 
                                      'orig_county' : 'Origin_zone',
                                      'dest_county' : 'Dest_zone'})
    df_cnty = df_cnty[['Origin_zone', 'Dest_zone',  'Period', 'Value']]

    df_cnty['Concept_ID'] = concept_id
    df_cnty['Metric_ID'] = metric_num
    df_cnty['Metric_name'] = 'Travel time savings'
    df_cnty['Submetric'] =  metric_num + '.2'
    df_cnty['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination county'
    df_cnty['Population'] = 'Whole Population'
    df_cnty['Geography'] = 'County'
    df_cnty['Zone_ID'] = ''
    df_cnty['Income'] = ''
    df_cnty['Mode'] = ''
    df_cnty['Purpose'] = ''
    df_cnty['Units'] = 'minutes'
    df_cnty['Total_Increment'] = ''
    
    # RDM
    df_rdm = summarize_all_combinations(temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period'], 
                                           summary_column=summary_col)

    df_rdm = df_rdm.rename(columns={ 
                                    'orig_rdm_zones' : 'Origin_zone',
                                    'dest_rdm_zones' : 'Dest_zone'})

    df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_rdm['Concept_ID'] = concept_id
    df_rdm['Metric_ID'] = metric_num
    df_rdm['Metric_name'] = 'Travel time savings'
    df_rdm['Submetric'] =  metric_num + '.3'
    df_rdm['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination RDM zone'
    df_rdm['Population'] = 'Whole Population'
    df_rdm['Geography'] = 'RDM'
    df_rdm['Zone_ID'] = ''
    df_rdm['Income'] = ''
    df_rdm['Mode'] = ''
    df_rdm['Purpose'] = ''
    df_rdm['Units'] = 'minutes'
    df_rdm['Total_Increment'] = ''
    
    # Super Districts
    df_sd = summarize_all_combinations(temp, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period'], 
                                           summary_column=summary_col)

    df_sd = df_sd.rename(columns={ 
                                  'orig_super_dist' : 'Origin_zone',
                                  'dest_super_dist' : 'Dest_zone'})
    df_sd = df_sd[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_sd['Concept_ID'] = concept_id
    df_sd['Metric_ID'] = metric_num
    df_sd['Metric_name'] = 'Travel time savings'
    df_sd['Submetric'] =  metric_num + '.4'
    df_sd['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination super district'
    df_sd['Population'] = 'Whole Population'
    df_sd['Geography'] = 'Super district'
    df_sd['Zone_ID'] = ''
    df_sd['Income'] = ''
    df_sd['Mode'] = ''
    df_sd['Purpose'] = ''
    df_sd['Units'] = 'trips'
    df_sd['Total_Increment'] = ''
    
    # Prioirty Population
    temp['pp_wtd_benefit'] = temp[summary_col] * temp['pp_share']/100
    df_pp = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column='pp_wtd_benefit')
    df_pp = df_pp[['Period', 'Value']]

    df_pp['Concept_ID'] = concept_id
    df_pp['Metric_ID'] = metric_num
    df_pp['Metric_name'] = 'Travel time savings'
    df_pp['Submetric'] =  metric_num + '.5'
    df_pp['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_pp['Population'] = 'Prioirty population'
    df_pp['Geography'] = 'Region'
    df_pp['Zone_ID'] = ''
    df_pp['Origin_zone'] = ''
    df_pp['Dest_zone'] = ''
    df_pp['Income'] = ''
    df_pp['Mode'] = ''
    df_pp['Purpose'] = ''
    df_pp['Units'] = 'trips'
    df_pp['Total_Increment'] = ''
    
    all_dfs = [df_region_period, df_cnty, df_rdm, df_sd, df_pp]

    for dfs in all_dfs:
        metric_name = filename_verbose #'_travel_time_auto_savings_'
        dfs = dfs.reset_index(drop=True)
        dfs = dfs[perf_measure_columns]
        file_name = dfs['Submetric'][0]
        geography = '_' + dfs['Geography'][0].replace(' ', '_')
        dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
        print(len(dfs), file_name, dfs['Metric_name'][0])

    combined_df = pd.concat([df_region_period, df_cnty, df_rdm, df_sd, df_pp]).reset_index(drop=True)
    combined_df.to_csv(_join(summary_dir,  metric_num + filename_verbose + concept_id + '_region' +filename_extension + '.csv'), index=None)

In [None]:
#create_summaries(final_trips,'ls_benefit_transit', '_travel_time_savings_transit_', 'E1.1', filename_extension, mode_numbers=[6,7,8])

In [None]:
#create_summaries(final_trips,'ls_benefit_auto', '_travel_time_savings_auto_', 'E1.2', filename_extension,  mode_numbers=[1,2,3])

In [None]:
#create_summaries(final_trips, 'ls_benefit_raidehail', '_travel_time_savings_ridehail_', 'E1.3', filename_extension, mode_numbers=[9])

In [None]:
#create_summaries(final_trips, 'ls_benefit_nm', '_travel_time_savings_non-motorized', 'E1.4', filename_extension, mode_numbers=[4,5])

## Effective Density Calculations

In [None]:
common_dir = params['common_dir']
decay_param_goods = 1.8
decay_param_services = 1.9

In [None]:
emp_data = pd.read_csv(_join(common_dir, f'EmpBreakdown{model_year}.csv'))

In [None]:
#emp_data['sector'] =  emp_data['link21'].map(emp_dict)
emp_data = emp_data.groupby(['TAZ'])['jobs'].sum().reset_index()
#emp_data = pd.pivot(emp_data, index='TAZ', columns='sector' , values='jobs').reset_index()

In [None]:
od_logsums = final_trips.groupby(['orig_taz', 'dest_taz'])['allmode_ls'].mean().reset_index()

In [None]:
od_logsums = pd.merge(od_logsums, emp_data, left_on='dest_taz', right_on='TAZ', how='left')
od_logsums['jobs'] = od_logsums['jobs'].fillna(0)

In [None]:
od_logsums['alpha_goods'] = decay_param_goods
od_logsums['alpha_services'] = decay_param_services

od_logsums['effective_density_good'] = od_logsums['jobs'] / (od_logsums['alpha_goods'] * od_logsums['allmode_ls'])
od_logsums['effective_density_services'] = od_logsums['jobs'] / (od_logsums['alpha_services'] * od_logsums['allmode_ls'])

In [None]:
od_logsums_orig = od_logsums.groupby(['orig_taz'])['effective_density_good', 'effective_density_services'].sum().reset_index()

In [None]:
sector_column = ['effective_density_good', 'effective_density_services']
sector_column = 'effective_density_good'

In [None]:
od_logsums_orig

In [None]:
def effective_density_summary(concept_id, od_logsums_orig, metric_num, sector_column, verbose, filename_extension, filename_verbose):
    df_region_ed = od_logsums_orig[['orig_taz', sector_column]]
    df_region_ed.columns = ['Origin_zone', 'Value']

    df_region_ed['Concept_ID'] = concept_id
    df_region_ed['Metric_ID'] = metric_num
    df_region_ed['Metric_name'] = 'Effective density'
    df_region_ed['Submetric'] = metric_num 
    df_region_ed['Description'] = 'Effective density for ' + verbose + ' in origin zone'
    df_region_ed['Population'] = 'Whole Population'
    df_region_ed['Geography'] = 'Region'
    df_region_ed['Zone_ID'] = ''
    df_region_ed['Income'] = ''
    df_region_ed['Mode'] = ''
    df_region_ed['Purpose'] = ''
    #df_region_ed['Origin_zone'] = '
    df_region_ed['Dest_zone'] = ''
    df_region_ed['Period'] = ''
    df_region_ed['Units'] = ''
    df_region_ed['Total_Increment'] = ''
    
    df_region_ed = df_region_ed[perf_measure_columns]
    df_region_ed.to_csv(_join(summary_dir,  metric_num + filename_verbose + \
                              concept_id + '_region' +filename_extension + '.csv'), index=None)

In [None]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.1', 
                          'effective_density_good', 'goods producing industries', 
                          filename_extension, '_effective_density_goods_producing_industries_')

In [None]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.2', 
                          'effective_density_services', 'services producing industries', 
                          filename_extension, '_effective_density_services_producing_industries_')