In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.options.display.float_format = '{:,.3f}'.format

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
ctramp_dir = params['ctramp_dir']
model_outputs_dir = params['model_dir']
summary_dir = params['summary_dir']
concept_id = params['concept_id']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
model_year = params['model_year']
filename_extension = params['filename_extension']
hwy_skims_dir = _join(model_outputs_dir, r'skims\highway' )

skims_dir = _join(model_outputs_dir, r'skims')

In [3]:
Path(summary_dir).mkdir(parents=True, exist_ok=True)
Path(preprocess_dir).mkdir(parents=True, exist_ok=True)

In [4]:
purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

### Calculate the taxi wait time for each origin zone

In [5]:
taz = pd.read_csv(_join(ctramp_dir, 'landuse', 'tazData_' + str(model_year) + '.csv'))
taz['popEmpSqMile'] = (taz['TOTPOP'] + taz['TOTEMP']) / (taz['TOTACRE'] * 0.0015625)

In [6]:
%%time
taz = taz[['ZONE', 'popEmpSqMile']]

# TNC 
#TNC_single_waitTime_mean =  10.3,8.5,8.4,6.3,3.0
#TNC_single_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#TNC_shared_waitTime_mean =  15.0,15.0,11.0,8.0,5.0
#TNC_shared_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#Taxi_waitTime_mean = 26.5,17.3,13.3,9.5,5.5
#Taxi_waitTime_sd =    6.4,6.4,6.4,6.4,6.4

#WaitTimeDistribution_EndPopEmpPerSqMi = 500,2000,5000,15000,9999999999

#TO DO: Ask John which wait time to use
taz['density_group'] = pd.cut(taz['popEmpSqMile'], bins= [-1, 500,2000,5000,15000,9999999999], 
                              labels=[10.3,8.5,8.4,6.3,3.0], ordered=False)
#taz['density_group'] = taz['density_group'].fillna(0)
taz['density_group'] =taz['density_group'].astype("int64")

taz = taz.sort_values('ZONE')
taxi_wait_time = np.repeat(taz['density_group'].values, len(taz)).reshape(len(taz), len(taz))

Wall time: 60 ms


### Load all the data from Skims 

In [7]:
%%time
# The data tab of the UEC file lists all the matrix cores and location an matrix files of skims
# 1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

# extract the file names, matrix cores 
matrix_df = pd.read_excel(_join(params['common_dir'], r"TripModeChoice.xlsx"), sheet_name='data')
matrix_df = matrix_df.iloc[9:]
matrix_df.columns = ['no', 'token', 'format', 'file','matrix', 'group', 'index']
#matrix_df[1:5]

# pre-processing
matrix_df['matrix_files'] = matrix_df['file'].str.replace('skims/', '')
matrix_df['path'] = 'skims'
matrix_df.loc[matrix_df['matrix_files'].str.contains('nonmot')==True, 'path'] = 'active'
matrix_df.loc[matrix_df['matrix_files'].str.contains('trnskm')==True, 'path'] = 'transit'
matrix_df.loc[matrix_df['matrix_files'].str.contains('hwyskm')==True, 'path'] = 'highway'
#matrix_df[1:5]

# Iterate over the DataFrame rows
for _, row in matrix_df.iterrows():
    variable_name = row['token']
    file_path = row['path']
    filename = row['matrix_files']
    matrix_cr = row['matrix']
    
    # Extract the variable name and index (if present)
    if '[' in variable_name:
        name_start = variable_name.index('[')
        name_end = variable_name.index(']')
        index = int(variable_name[name_start+1:name_end])
        variable_name = variable_name[:name_start]
    else:
        index=None
    
    # Read the file using numpy.load() and assign it to the variable with the specified index
    file = omx.open_file(_join(skims_dir, file_path, filename))
    file_contents = np.array(file[matrix_cr])
    print(variable_name,index, _join(skims_dir, file_path, filename), file_contents.sum(), file_contents.min(), file_contents.max())
    if '[' in row['token']:
        if variable_name in locals() and isinstance(locals()[variable_name], np.ndarray):
            arr = locals()[variable_name]
            if index >= len(arr):
                # Resize the array if the index is out of bounds
                new_arr = np.resize(arr, index + 1)
                new_arr[index] = file_contents
                locals()[variable_name] = new_arr
            else:
                arr[index] = file_contents
        else:
            arr = np.empty(index + 1, dtype=object)
            arr[index] = file_contents
            locals()[variable_name] = arr
    else:
        locals()[variable_name] = file_contents

DISTWALK None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\active\nonmotskm.omx 10796411595951.248 0.03142102696417673 1000000.0
DISTBIKE None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\active\nonmotskm.omx 8910510616075.47 0.018911417199612486 1000000.0
SOV_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13965190000.0 0.11402832 1000000.0
SOV_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13847476000.0 0.035375483 1000000.0
SOV_BTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 800814500.0 0.0 472.0
SOV_VTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 740758100.0 0.0 334.74524
HOV2_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13957872000.0 0.11402832 1000000.0
HOV2_DIST 1 C:\MTC_tmpy\TM

WLK_TRN_WLK_WAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 15679600000.0 0.0 13121.753
WLK_TRN_WLK_TOTIVT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 72267555000.0 0.0 35736.957
WLK_TRN_WLK_CROWD 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 460263070.0 0.0 1651.4585
WLK_TRN_WLK_IVT_LOC 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 14543465000.0 0.0 21880.174
WLK_TRN_WLK_IVT_EXP 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 13815339000.0 0.0 27026.084
WLK_TRN_WLK_IVT_LRT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 2572853500.0 0.0 8251.378
WLK_TRN_WLK_IVT_FRY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\

WLK_TRN_WLK_IVT_HVY 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 29163426000.0 0.0 13942.499
WLK_TRN_WLK_IVT_COM 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 11309249000.0 0.0 17370.922
WLK_TRN_WLK_FAR 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 5075058000.0 0.0 1606.8688
WLK_TRN_WLK_WAUX 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 13474856000.0 0.0 58780.285
WLK_TRN_WLK_IWAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 8426276400.0 0.0 15144.0
WLK_TRN_WLK_XWAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_WLK.omx 8161564000.0 0.0 15144.0
WLK_TRN_WLK_BOARDS 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\t

PNR_TRN_WLK_WAUX 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 7862337500.0 0.0 13677.844
PNR_TRN_WLK_IWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 843347100.0 0.0 2235.8616
PNR_TRN_WLK_XWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 13064792000.0 0.0 14019.572
PNR_TRN_WLK_BOARDS 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 19836748.0 0.0 6.568447
PNR_TRN_WLK_WACC 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 0.0 0.0 0.0
PNR_TRN_WLK_WEGR 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_PNR_TRN_WLK.omx 9225430000.0 0.0 1498.9985
PNR_TRN_WLK_WAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_PNR_TRN_WLK.om

PNR_TRN_WLK_XWAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_PNR_TRN_WLK.omx 13615256000.0 0.0 16050.0
PNR_TRN_WLK_BOARDS 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_PNR_TRN_WLK.omx 17664186.0 0.0 5.7317076
PNR_TRN_WLK_WACC 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_PNR_TRN_WLK.omx 0.0 0.0 0.0
PNR_TRN_WLK_WEGR 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_PNR_TRN_WLK.omx 9936300000.0 0.0 1498.9985
KNR_TRN_WLK_WAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_KNR_TRN_WLK.omx 10207549000.0 0.0 7400.0
KNR_TRN_WLK_TOTIVT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_KNR_TRN_WLK.omx 47418420000.0 0.0 30660.08
KNR_TRN_WLK_CROWD 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_KNR_TRN_WLK.om

KNR_TRN_WLK_BOARDS 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_KNR_TRN_WLK.omx 15870158.0 0.0 5.8448277
KNR_TRN_WLK_WACC 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_KNR_TRN_WLK.omx 0.0 0.0 0.0
KNR_TRN_WLK_WEGR 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_KNR_TRN_WLK.omx 8660829000.0 0.0 1498.9985
KNR_TRN_WLK_WAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_KNR_TRN_WLK.omx 10489494000.0 0.0 13148.934
KNR_TRN_WLK_TOTIVT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_KNR_TRN_WLK.omx 56485910000.0 0.0 34532.05
KNR_TRN_WLK_CROWD 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_KNR_TRN_WLK.omx 2987133000.0 0.0 3701.033
KNR_TRN_WLK_IVT_LOC 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_KNR_TRN_W

WLK_TRN_PNR_WACC 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_PNR.omx 7464166400.0 0.0 1499.0826
WLK_TRN_PNR_WEGR 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_PNR.omx 0.0 0.0 0.0
WLK_TRN_PNR_WAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_WLK_TRN_PNR.omx 15218888000.0 0.0 18630.826
WLK_TRN_PNR_TOTIVT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_WLK_TRN_PNR.omx 68985545000.0 0.0 37286.445
WLK_TRN_PNR_CROWD 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_WLK_TRN_PNR.omx 5226412500.0 0.0 4728.2446
WLK_TRN_PNR_IVT_LOC 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_WLK_TRN_PNR.omx 14282911000.0 0.0 16783.023
WLK_TRN_PNR_IVT_EXP 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmAM_WLK

WLK_TRN_PNR_WEGR 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmPM_WLK_TRN_PNR.omx 0.0 0.0 0.0
WLK_TRN_PNR_WAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WLK_TRN_PNR.omx 19190446000.0 0.0 18309.602
WLK_TRN_PNR_TOTIVT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WLK_TRN_PNR.omx 54061126000.0 0.0 33111.746
WLK_TRN_PNR_CROWD 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WLK_TRN_PNR.omx 864440300.0 0.0 2591.637
WLK_TRN_PNR_IVT_LOC 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WLK_TRN_PNR.omx 10404224000.0 0.0 10824.764
WLK_TRN_PNR_IVT_EXP 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WLK_TRN_PNR.omx 5161590000.0 0.0 14251.668
WLK_TRN_PNR_IVT_LRT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEV_WL

WLK_TRN_KNR_WAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 12630669000.0 0.0 25863.635
WLK_TRN_KNR_TOTIVT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 48155197000.0 0.0 34178.496
WLK_TRN_KNR_CROWD 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 293681860.0 0.0 731.44824
WLK_TRN_KNR_IVT_LOC 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 9957813000.0 0.0 13932.647
WLK_TRN_KNR_IVT_EXP 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 1866441700.0 0.0 15191.908
WLK_TRN_KNR_IVT_LRT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmMD_WLK_TRN_KNR.omx 1806487400.0 0.0 6171.666
WLK_TRN_KNR_IVT_FRY 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\tr

In [8]:
# change th 1000000.0 values in DISTWALK and DISTBIKE to 0
DISTWALK = np.where(DISTWALK == 1000000.0, 0, DISTWALK)
DISTBIKE = np.where(DISTBIKE == 1000000.0, 0, DISTBIKE)

PNR_TRN_WLK_DTIM[1] =  np.where(PNR_TRN_WLK_DTIM[1] < 0, 0, PNR_TRN_WLK_DTIM[1])
PNR_TRN_WLK_DTIM[2] =  np.where(PNR_TRN_WLK_DTIM[2] < 0, 0, PNR_TRN_WLK_DTIM[2])
PNR_TRN_WLK_DTIM[3] =  np.where(PNR_TRN_WLK_DTIM[3] < 0, 0, PNR_TRN_WLK_DTIM[3])
PNR_TRN_WLK_DTIM[4] =  np.where(PNR_TRN_WLK_DTIM[4] < 0, 0, PNR_TRN_WLK_DTIM[4])
PNR_TRN_WLK_DTIM[5] =  np.where(PNR_TRN_WLK_DTIM[5] < 0, 0, PNR_TRN_WLK_DTIM[5])

KNR_TRN_WLK_DTIM[1] =  np.where(KNR_TRN_WLK_DTIM[1] < 0, 0, KNR_TRN_WLK_DTIM[1])
KNR_TRN_WLK_DTIM[2] =  np.where(KNR_TRN_WLK_DTIM[2] < 0, 0, KNR_TRN_WLK_DTIM[2])
KNR_TRN_WLK_DTIM[3] =  np.where(KNR_TRN_WLK_DTIM[3] < 0, 0, KNR_TRN_WLK_DTIM[3])
KNR_TRN_WLK_DTIM[4] =  np.where(KNR_TRN_WLK_DTIM[4] < 0, 0, KNR_TRN_WLK_DTIM[4])
KNR_TRN_WLK_DTIM[5] =  np.where(KNR_TRN_WLK_DTIM[5] < 0, 0, KNR_TRN_WLK_DTIM[5])

WLK_TRN_PNR_DTIM[1] =  np.where(WLK_TRN_PNR_DTIM[1] < 0, 0, WLK_TRN_PNR_DTIM[1])
WLK_TRN_PNR_DTIM[2] =  np.where(WLK_TRN_PNR_DTIM[2] < 0, 0, WLK_TRN_PNR_DTIM[2])
WLK_TRN_PNR_DTIM[3] =  np.where(WLK_TRN_PNR_DTIM[3] < 0, 0, WLK_TRN_PNR_DTIM[3])
WLK_TRN_PNR_DTIM[4] =  np.where(WLK_TRN_PNR_DTIM[4] < 0, 0, WLK_TRN_PNR_DTIM[4])
WLK_TRN_PNR_DTIM[5] =  np.where(WLK_TRN_PNR_DTIM[5] < 0, 0, WLK_TRN_PNR_DTIM[5])

WLK_TRN_KNR_DTIM[1] =  np.where(WLK_TRN_KNR_DTIM[1] < 0, 0, WLK_TRN_KNR_DTIM[1])
WLK_TRN_KNR_DTIM[2] =  np.where(WLK_TRN_KNR_DTIM[2] < 0, 0, WLK_TRN_KNR_DTIM[2])
WLK_TRN_KNR_DTIM[3] =  np.where(WLK_TRN_KNR_DTIM[3] < 0, 0, WLK_TRN_KNR_DTIM[3])
WLK_TRN_KNR_DTIM[4] =  np.where(WLK_TRN_KNR_DTIM[4] < 0, 0, WLK_TRN_KNR_DTIM[4])
WLK_TRN_KNR_DTIM[5] =  np.where(WLK_TRN_KNR_DTIM[5] < 0, 0, WLK_TRN_KNR_DTIM[5])

In [None]:
# randomly check few matrix cores
#PNR_TRN_WLK_DDIST[4].sum()
#PNR_TRN_WLK_DDIST[2].sum()

In [9]:
# use the PM peak crowding and iwait variables, transpose them, and use them for the AM.
PNR_TRN_WLK_CROWD[2] = PNR_TRN_WLK_CROWD[4].T
PNR_TRN_WLK_IWAIT[2] = PNR_TRN_WLK_IWAIT[4].T

In [10]:
# ct ramp has params.properties which has certain parameter values used in the utility equations. 
# Following function extracts these values.


def extract_property_values(file_path, variables):
    property_values = {}
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line and not line.startswith('#'):
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                if key in variables:
                    property_values[key] = value
    return property_values

In [11]:
%%time
for purp in purpose:
    #print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    # Removing NAs
    uec_purp_params_prop = uec_purp.loc[~uec_purp['Token'].isna()]
    # extract the parameters that have % in in their names, clean up-remove % and replace . with _
    uec_purp_params_prop = uec_purp_params_prop.loc[(uec_purp_params_prop['Formula for variable'].str.contains('%')==True)]
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace('%', '') 
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace(".", "_")
    # read parameters file
    file_path = _join(ctramp_dir, 'input', 'params.properties')
    # extract list of parameters
    prop_variables = list(uec_purp_params_prop['Formula for variable'])
    prop_variables_tokens = list(uec_purp_params_prop['Token'])
    prop_variables = [x.replace('_', '.') for x in prop_variables]

    values = extract_property_values(file_path, prop_variables)
    
    # Create a dictionary to store the extracted values
    extracted_values = {}

    # Assign the extracted values to the dictionary
    for variable, value in values.items():
        extracted_values[variable] = value

    # Print the values from the extracted_values dictionary 
    for variable, value in extracted_values.items():
        #print(f'{variable}: {value}')
        exec(f'{variable.replace(".", "_")} = {value}')
    
    
    # Assign the values to tokens
    # example costInitialTaxi = %taxi.baseFare%
    for _, row in uec_purp_params_prop.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
            #print(f"Variable '{variable_name}' is defined.")
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue

    
    uec_purp_params = uec_purp.loc[~uec_purp['Formula for variable'].isna()]
    uec_purp_params = uec_purp_params.loc[~uec_purp_params['Token'].isna()]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('if')==True)]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('%')==True)]

    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].astype(str)
    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].str.replace('@', '')

    key_column = 'Token'
    value_column = 'Formula for variable'

    # Create dictionary from selected columns
    data_dict = {}

    for _, row in uec_purp_params.iterrows():
        key = row[key_column]
        value = row[value_column]

        # Handle values that are strings
        if isinstance(value, str):
            try:
                value = int(value)
                data_dict[key] = value
            except ValueError:
                try:
                    value = float(value)
                    data_dict[key] = value
                except ValueError:
                    pass

    #get all the parameters
    variables = data_dict

    for _, row in uec_purp_params.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue
    
    #break
    
    int_zone = 3332
    da_util = np.empty((5, int_zone, int_zone))
    sr2_util = np.empty((5, int_zone, int_zone))
    sr3_util = np.empty((5, int_zone, int_zone))
    wlk_util =  np.empty((5, int_zone, int_zone))
    bike_util = np.empty((5, int_zone, int_zone))
    wlk_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_pnr_util = np.empty((5, int_zone, int_zone))
    pnr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_knr_util = np.empty((5, int_zone, int_zone))
    knr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    taxi_util = np.empty((5, int_zone, int_zone))
    
    for tripPeriod in time_period:
        #for trip_mode in oth_modes:
        #uec_purp_mode = uec_purp_df.loc[uec_purp_df['Description'].str.contains(trip_mode)==True]
        #uec_purp_mode['Formula for variable'] = uec_purp_mode['Formula for variable'].str.replace('tripPeriod', str(period))
        #uec_purp_mode['formula_calculation'] = 
        util = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'),'w')

        #Drive alone
        util['DA'] = c_ivt*SOV_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'DA', " ", np.array(util['DA']).min(), np.array(util['DA']).max())

        #Shared ride 2
        util['SR2'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR2', " ", np.array(util['SR2']).min(), np.array(util['SR2']).max())

        #Shared ride 3
        util['SR3'] = c_ivt*HOV3_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR3', " ", np.array(util['SR3']).min(), np.array(util['SR3']).max())

        # Walk 
        util['WALK'] = (walk_dist<=1)* (c_walkTimeShort * np.minimum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) + \
                       (walk_dist>1)* (c_walkTimeLong * np.maximum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) 
        print(tripPeriod, purp, 'WALK', " ", np.array(util['WALK']).min(), np.array(util['WALK']).max())
        
        #Bike
        util['BIKE'] = (bike_dist<=6)*(c_bikeTimeShort* np.minimum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed)) + \
                       (bike_dist>6)*(c_bikeTimeLong* np.maximum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed))
        print(tripPeriod, purp, 'BIKE', " ", np.array(util['BIKE']).min(), np.array(util['BIKE']).max())
        
        
        #Walk transit Walk
        util['WLK_TRN_WLK'] =  c_ivt*WLK_TRN_WLK_IVT_LOC[tripPeriod]/100 + \
                            c_ivt_exp*WLK_TRN_WLK_IVT_EXP[tripPeriod]/100 + \
                            c_ivt_lrt*WLK_TRN_WLK_IVT_LRT[tripPeriod]/100 + \
                            c_ivt_ferry*WLK_TRN_WLK_IVT_FRY[tripPeriod]/100 + \
                            c_ivt_hvy*WLK_TRN_WLK_IVT_HVY[tripPeriod]/100 + \
                            c_ivt_com*WLK_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_wlk * np.maximum(WLK_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_wacc*WLK_TRN_WLK_WACC[tripPeriod]/100 + \
                            c_wegr*WLK_TRN_WLK_WEGR[tripPeriod]/100
        
        print(tripPeriod, purp, 'WLK_TRN_WLK', " ", np.array(util['WLK_TRN_WLK']).min(), np.array(util['WLK_TRN_WLK']).max())
        
        # Walk Transit PNR - Inbound
        util['WLK_TRN_PNR'] =  c_ivt*WLK_TRN_PNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_PNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_PNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_PNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_PNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_PNR_XWAIT[tripPeriod]/100 + \
                            c_dtim*WLK_TRN_PNR_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_PNR_BOARDS[tripPeriod]-1,0) + \
                            c_wacc*WLK_TRN_PNR_WACC[tripPeriod]/100 + \
                            c_waux*WLK_TRN_PNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_PNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_PNR', " ", np.array(util['WLK_TRN_PNR']).min(), np.array(util['WLK_TRN_PNR']).max())

        # PNR transit Walk - Outbound
        util['PNR_TRN_WLK'] =  c_ivt*PNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*PNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*PNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(PNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(PNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*PNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_dtim*PNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(PNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_wegr*PNR_TRN_WLK_WEGR[tripPeriod]/100 + \
                            c_waux*PNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(PNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'PNR_TRN_WLK', " ", np.array(util['PNR_TRN_WLK']).min(), np.array(util['PNR_TRN_WLK']).max())

        # Walk Transit KNR - Inbound
        util['WLK_TRN_KNR'] = c_ivt*WLK_TRN_KNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_KNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_KNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_KNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_KNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_KNR_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_KNR_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*WLK_TRN_KNR_DTIM[tripPeriod]/100 + \
                            c_wacc*WLK_TRN_KNR_WACC[tripPeriod]/100 + \
                            c_waux*WLK_TRN_KNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_KNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_KNR', " ", np.array(util['WLK_TRN_KNR']).min(), np.array(util['WLK_TRN_KNR']).max())

        # KNR Transit Walk - Outbound
        util['KNR_TRN_WLK'] = c_ivt*KNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*KNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*KNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(KNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(KNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*KNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(KNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*KNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_wegr*WLK_TRN_KNR_WEGR[tripPeriod]/100 + \
                            c_waux*KNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(KNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'KNR_TRN_WLK', " ", np.array(util['KNR_TRN_WLK']).min(), np.array(util['KNR_TRN_WLK']).max())

        # taxi
        util['RIDEHAIL'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]  + c_ivt*1.5*taxi_wait_time
        print(tripPeriod, purp, 'RIDEHAIL', " ", np.array(util['RIDEHAIL']).min(), np.array(util['RIDEHAIL']).max())
        
    
        util.close()

1 Work DA   -22000.0 -0.002508623
1 Work SR2   -22000.0 -0.002508623
1 Work SR3   -22000.0 -0.002508623
1 Work WALK   -19.223921100079313 -0.0
1 Work BIKE   -47.724191393766105 -0.0
1 Work WLK_TRN_WLK   -55.023533 -0.0
1 Work WLK_TRN_PNR   -26.461655 0.0
1 Work PNR_TRN_WLK   -39.51164 0.0
1 Work WLK_TRN_KNR   -26.372913 0.0
1 Work KNR_TRN_WLK   -38.19036 0.0
1 Work RIDEHAIL   -22000.33 -0.10150862305983901
2 Work DA   -22000.0 -0.0025205028
2 Work SR2   -22000.0 -0.0025205028
2 Work SR3   -22000.0 -0.0025205028
2 Work WALK   -19.223921100079313 -0.0
2 Work BIKE   -47.724191393766105 -0.0
2 Work WLK_TRN_WLK   -34.802177 -0.0
2 Work WLK_TRN_PNR   -29.731298 0.0
2 Work PNR_TRN_WLK   -18.75381 0.0
2 Work WLK_TRN_KNR   -29.488659 0.0
2 Work KNR_TRN_WLK   -15.388551 0.0
2 Work RIDEHAIL   -22000.33 -0.10152050277777017
3 Work DA   -22000.0 -0.002536045
3 Work SR2   -22000.0 -0.002536045
3 Work SR3   -22000.0 -0.002536045
3 Work WALK   -19.223921100079313 -0.0
3 Work BIKE   -47.724191393766105

4 Escort WLK_TRN_KNR   -36.17533 0.0
4 Escort KNR_TRN_WLK   -23.027073 0.0
4 Escort RIDEHAIL   -27900.4185 -0.12874310859777033
5 Escort DA   -27900.0 -0.003183112
5 Escort SR2   -27900.0 -0.003183112
5 Escort SR3   -27900.0 -0.003183112
5 Escort WALK   -24.379427213282405 -0.0
5 Escort BIKE   -60.52295181300339 -0.0
5 Escort WLK_TRN_WLK   -59.385555 -0.0
5 Escort WLK_TRN_PNR   -42.39295 0.0
5 Escort PNR_TRN_WLK   -38.858482 0.0
5 Escort WLK_TRN_KNR   -38.04136 0.0
5 Escort KNR_TRN_WLK   -33.90611 0.0
5 Escort RIDEHAIL   -27900.4185 -0.12873311201408505
1 Shopping DA   -27900.0 -0.00318139
1 Shopping SR2   -27900.0 -0.00318139
1 Shopping SR3   -27900.0 -0.00318139
1 Shopping WALK   -24.379427213282405 -0.0
1 Shopping BIKE   -60.52295181300339 -0.0
1 Shopping WLK_TRN_WLK   -68.88002 -0.0
1 Shopping WLK_TRN_PNR   -33.55819 0.0
1 Shopping PNR_TRN_WLK   -50.10794 0.0
1 Shopping WLK_TRN_KNR   -33.445644 0.0
1 Shopping KNR_TRN_WLK   -48.432316 0.0
1 Shopping RIDEHAIL   -27900.4185 -0.1287313

3 Social SR3   -27900.0 -0.003216166
3 Social WALK   -24.379427213282405 -0.0
3 Social BIKE   -60.52295181300339 -0.0
3 Social WLK_TRN_WLK   -55.696358 -0.0
3 Social WLK_TRN_PNR   -35.367344 0.0
3 Social PNR_TRN_WLK   -35.35457 0.0
3 Social WLK_TRN_KNR   -34.851616 0.0
3 Social KNR_TRN_WLK   -31.192188 0.0
3 Social RIDEHAIL   -27900.4185 -0.12876616604924201
4 Social DA   -27900.0 -0.0031931086
4 Social SR2   -27900.0 -0.0031931086
4 Social SR3   -27900.0 -0.0031931086
4 Social WALK   -24.379427213282405 -0.0
4 Social BIKE   -60.52295181300339 -0.0
4 Social WLK_TRN_WLK   -39.311394 -0.0
4 Social WLK_TRN_PNR   -36.56835 0.0
4 Social PNR_TRN_WLK   -25.082716 0.0
4 Social WLK_TRN_KNR   -36.17533 0.0
4 Social KNR_TRN_WLK   -23.027073 0.0
4 Social RIDEHAIL   -27900.4185 -0.12874310859777033
5 Social DA   -27900.0 -0.003183112
5 Social SR2   -27900.0 -0.003183112
5 Social SR3   -27900.0 -0.003183112
5 Social WALK   -24.379427213282405 -0.0
5 Social BIKE   -60.52295181300339 -0.0
5 Social WLK

In [None]:
# util.close()

In [12]:
# add mappings from time period and purpose
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
len(df_trips)

35981987

In [13]:
# inbound trips get orig purpose, outbound trips get dest purpose
df_trips['util_purpose'] = np.where(df_trips['inbound']==1, df_trips['orig_purpose'], df_trips['dest_purpose'])

In [None]:
#df_trips['util_purpose'].value_counts()

In [14]:
purp_dict = { 'work' : 'Work', 
              'shopping' : 'Shopping',
              'escort' : 'Escort', 
              'othdiscr': 'OthDiscr',
              'othmaint': 'OthMaint',
              'school' : 'School', 
              'eatout' : 'EatOut', 
              'atwork' : 'WorkBased', 
              'social' : 'Social',
              'university' : 'University'}

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

In [15]:
df_trips['util_purpose'] = df_trips['util_purpose'].map(purp_dict)

In [16]:
%%time

import pandas as pd
import itertools

num_zones = 3332

for tripPeriod, value in time_period.items():

    for purp in purpose:
        print(f'Analyzing purpose: {purp} and time period: {value}')
        #df_temp = df_trips.query(f"util_purpose == {purp} and Period == {value.lower()}")
        #df_temp = df_trips.query(f"util_purpose == '{purp}' and Period == '{value.lower}'")
        df_temp = df_trips.loc[(df_trips['util_purpose'] == purp) & (df_trips['Period'] == value.lower())]
        
        # Generate all combinations of orig and dest
        combinations = list(itertools.product(range(1, num_zones + 1), repeat=2))

        # Create the DataFrame with orig and dest columns
        purp_df = pd.DataFrame(combinations, columns=['orig', 'dest'])

        # read utility files
        util_file = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'))

        for core in util_file.list_matrices():
            print(f'extracting {core} core form utility file')
            mode_core = np.array(util_file[core])
            mode_core = np.where(mode_core == 0, -999, mode_core)
            skm_df = pd.DataFrame(mode_core)
            skm_df = pd.melt(skm_df.reset_index(), id_vars='index', value_vars=skm_df.columns)
            skm_df['index'] = skm_df['index'] + 1
            skm_df['variable'] = skm_df['variable'] + 1
            skm_df.columns = ['orig', 'dest', core]
            purp_df = pd.merge(purp_df, skm_df, on=['orig', 'dest'], how='left')
        
        df_temp = pd.merge(df_temp, purp_df, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        
        print(f'writing the trip file for purpose : {purp} and time period: {value}', df_temp.shape)
        df_temp.to_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        
        #break

Analyzing purpose: Work and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Work and time period: EA (305666, 40)
Analyzing purpose: University and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility f

Analyzing purpose: Shopping and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Shopping and time period: AM (740724, 40)
Analyzing purpose: EatOut and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utili

Analyzing purpose: OthDiscr and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : OthDiscr and time period: MD (1071502, 40)
Analyzing purpose: WorkBased and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form u

extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : School and time period: EV (141579, 40)
Analyzing purpose: Escort and time period: EV
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility f

In [17]:
# combine all trips into one file

# read trip files
final_trips = []

for tripPeriod, value in time_period.items():
    for purp in purpose:
        temp = pd.read_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        final_trips.append(temp)
        
final_trips = pd.concat(final_trips)
len(final_trips) 

35981987

### Calculate the logsums

In [18]:
auto_nesting_coef = 0.72
trn_nesting_coef = 0.72
nm_nest_coef = 0.72
ridehail_nest_coef = 1

In [19]:
# Create logsum

final_trips['auto_ls'] = auto_nesting_coef * (np.log(np.exp(final_trips['DA']/auto_nesting_coef) + 
                                                     np.exp(final_trips['SR2']/auto_nesting_coef) + 
                                                     np.exp(final_trips['SR3']/auto_nesting_coef)))


final_trips['exp_trn'] = np.exp(final_trips['WLK_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_PNR']/trn_nesting_coef) + \
                               np.exp(final_trips['PNR_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_KNR']/trn_nesting_coef) + \
                               np.exp(final_trips['KNR_TRN_WLK']/trn_nesting_coef)

final_trips['trn_ls'] = np.where(final_trips['exp_trn'] > 0, trn_nesting_coef *(np.log(final_trips['exp_trn'])), 0)


final_trips['exp_nm'] = np.exp(final_trips['WALK']/nm_nest_coef) + \
                                  np.exp(final_trips['BIKE']/nm_nest_coef)
    
final_trips['non_mot_ls'] = np.where(final_trips['exp_nm'] > 0, nm_nest_coef * (np.log(final_trips['exp_nm'])), 0)

final_trips['exp_ridehail'] = np.exp(final_trips['RIDEHAIL']/ridehail_nest_coef)
final_trips['ridehail_ls'] = np.where(final_trips['exp_ridehail']>0, ridehail_nest_coef * (np.log(final_trips['exp_ridehail'])),0)

final_trips['allmode_ls'] = np.log(np.exp(final_trips['auto_ls']) + 
                                np.exp(final_trips['trn_ls']) + 
                                np.exp(final_trips['non_mot_ls']) + 
                                np.exp(final_trips['ridehail_ls']))

In [20]:
# Create logsum
final_trips['allmode_ls_adj'] = final_trips['allmode_ls'] - 10

In [21]:
final_trips['sum_ls'] = np.exp(final_trips['auto_ls']) + np.exp(final_trips['trn_ls']) +  np.exp(final_trips['non_mot_ls']) + np.exp(final_trips['ridehail_ls'])

In [11]:
#final_trips[final_trips['allmode_ls']<0][1:10]

### Get BETA IVT values for each purpose from UEC sheet

In [22]:
# get beta IVT for each purpose
ivt_purp = pd.DataFrame(columns=['util_purpose'])

for purp in purpose:
    print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    ivt = uec_purp.loc[uec_purp['Token']=='c_ivt', 'Formula for variable'].item()
    #ivt_lrt = uec_purp.loc[uec_purp['Token']=='c_ivt_lrt', 'Formula for variable'].item()
    #ivt_ferry = uec_purp.loc[uec_purp['Token']=='c_ivt_ferry', 'Formula for variable'].item()
    #ivt_exp = uec_purp.loc[uec_purp['Token']=='c_ivt_exp', 'Formula for variable'].item()
    #ivt_hvy = uec_purp.loc[uec_purp['Token']=='c_ivt_hvy', 'Formula for variable'].item()
    #ivt_com = uec_purp.loc[uec_purp['Token']=='c_ivt_com', 'Formula for variable'].item()
    
    ivt_purp = ivt_purp.append({'util_purpose': purp, 'b_ivt': ivt #'b_ivt_lrt': ivt_lrt,
                                #'b_ivt_ferry' : ivt_ferry, 'b_ivt_exp': ivt_exp, 
                                #'b_ivt_hvy': ivt_hvy, 'b_ivt_com': ivt_com
                               }, ignore_index=True)

Work
University
School
Escort
Shopping
EatOut
OthMaint
Social
OthDiscr
WorkBased


In [23]:
# merge with trip roster
final_trips = pd.merge(final_trips, ivt_purp, on = 'util_purpose', how='left')

In [24]:
# logsum benefits auto
final_trips['ls_benefit_auto'] =  (final_trips['allmode_ls_adj'] * \
                                    (np.exp(final_trips['auto_ls'])/final_trips['sum_ls']) * \
                                        (final_trips['trips']/final_trips['b_ivt'])) #

In [25]:
# logsum benefits transit
final_trips['ls_benefit_transit'] = (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['trn_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* 

In [26]:
# logsum benefits ridehail
final_trips['ls_benefit_raidehail'] = (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['ridehail_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* final_trips['trips']

In [27]:
# logsum benefits non-motorized
final_trips['ls_benefit_nm'] =  (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['non_mot_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* final_trips['trips']

In [18]:
# superdistrict average composite utility
#super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['allmode_ls'].mean().reset_index()
#super_dist.to_csv("super_dist_composite_utility_"+concept_id+".csv", index=False)

In [28]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['allmode_ls'].mean().reset_index()
super_dist.to_csv("super_dist_composite_utility_allmodels"+concept_id+".csv", index=False)

In [29]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trn_ls'].mean().reset_index()
super_dist.to_csv("super_dist_trnls"+concept_id+".csv", index=False)

In [30]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['sum_ls'].mean().reset_index()
super_dist.to_csv("super_dist_composite_utility_sumls"+concept_id+".csv", index=False)

In [31]:
# superdistrict average composite utility
super_dist_util = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['allmode_ls', 
                                                                                   'auto_ls', 
                                                                                   'trn_ls'].mean().reset_index()

super_dist_trips = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].sum().reset_index()

super_dist = pd.merge(super_dist_util,super_dist_trips, on=['orig_super_dist', 'dest_super_dist', 'Period'], how='left')

super_dist.to_csv("super_dist_util_trips"+concept_id+".csv", index=False)

In [32]:
# superdistrict average composite utility
trn_ls = final_trips.groupby(['orig_taz', 'dest_taz', 'Period'])['trn_ls'].mean().reset_index()
trn_ls = trn_ls.pivot(index=['orig_taz', 'dest_taz'], columns='Period', values='trn_ls')
trn_ls.to_csv("taz_trnls"+concept_id+".csv.gz", compression='gzip')

In [33]:
# superdistrict transit trips
trn_trips = final_trips.loc[final_trips['trip_mode'].isin([6,7,8])]
trn_trips = trn_trips.groupby(['orig_super_dist', 'dest_super_dist'])['trips'].sum().reset_index()
trn_trips.to_csv("super_district_trn_trips_"+concept_id+".csv")

In [34]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].sum().reset_index()
super_dist.to_csv("super_dist_trips_"+concept_id+".csv")

In [22]:
#final_trips = pd.read_parquet(_join(preprocess_dir, 'trips_ls.parquet'))

In [35]:
summary_col = 'ls_benefit_transit'
mode_numbers = [6,7,8]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
trn = df_region_period[['Period', 'Value']]
trn.columns = ['Period', 'trn']
#df_region_period.to_csv('trn_ls.csv')

summary_col = 'ls_benefit_auto'
mode_numbers = [1,2,3]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
auto = df_region_period[['Period', 'Value']]
auto.columns = ['Period', 'auto']
#df_region_period.to_csv('auto_ls.csv')

summary_col = 'ls_benefit_nm'
mode_numbers = [4,5]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
nm = df_region_period[['Period', 'Value']]
nm.columns = ['Period', 'nm']
#df_region_period.to_csv('nm_ls.csv')

summary_col = 'ls_benefit_raidehail'
mode_numbers = [9]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
rh = df_region_period[['Period', 'Value']]
rh.columns = ['Period', 'rh']
#df_region_period.to_csv('rh_ls.csv')

final = pd.merge(trn, auto, on = 'Period').merge(
                    nm, on='Period').merge(
                    rh, on='Period')

final.to_csv("ls_benefits"+concept_id+".csv")

In [36]:
summary_col = 'trips'
mode_numbers = [6,7,8]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
trn = df_region_period[['Period', 'Value']]
trn.columns = ['Period', 'trn']
#df_region_period.to_csv('trn_ls.csv')

summary_col = 'trips'
mode_numbers = [1,2,3]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
auto = df_region_period[['Period', 'Value']]
auto.columns = ['Period', 'auto']
#df_region_period.to_csv('auto_ls.csv')

summary_col = 'trips'
mode_numbers = [4,5]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
nm = df_region_period[['Period', 'Value']]
nm.columns = ['Period', 'nm']
#df_region_period.to_csv('nm_ls.csv')

summary_col = 'trips'
mode_numbers = [9]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
rh = df_region_period[['Period', 'Value']]
rh.columns = ['Period', 'rh']
#df_region_period.to_csv('rh_ls.csv')

final = pd.merge(trn, auto, on = 'Period').merge(
                    nm, on='Period').merge(
                    rh, on='Period')

final.to_csv("ls_benefits"+concept_id+"_trips.csv")

In [37]:
#### Add distance
md_dist = omx.open_file(_join(ctramp_dir, 'skims\HWYSKMmd.omx'))
md_dist = skim_core_to_df(md_dist, 'DISTDAM', cols =['orig', 'dest', 'dist'])

final_trips = pd.merge(final_trips, md_dist, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')

def calculate_weighted_average_by_category(df):
    weighted_avgs = df.groupby('Period').apply(
        lambda group: (group['trips'] * group['dist']).sum() / group['trips'].sum()
    )
    
    weighted_avgs = weighted_avgs.reset_index()
    weighted_avgs.columns = ['Period', 'dist']
    weighted_avgs_all = pd.DataFrame(columns=['Period', 'dist'])
    weighted_avgs_all.loc[0] = ['All', (df['dist'] * df['trips']).sum() / df['trips'].sum()]
 
    weighted_avgs_all = pd.concat([weighted_avgs_all, weighted_avgs], ignore_index=True)
    
    return weighted_avgs_all

In [38]:
summary_col = 'trips'
mode_numbers = [6,7,8]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = calculate_weighted_average_by_category(temp)
trn = df_region_period[['Period', 'dist']]
trn.columns = ['Period', 'trn']
#df_region_period.to_csv('trn_ls.csv')

summary_col = 'trips'
mode_numbers = [1,2,3]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = calculate_weighted_average_by_category(temp)
auto = df_region_period[['Period', 'dist']]
auto.columns = ['Period', 'auto']
#df_region_period.to_csv('auto_ls.csv')

summary_col = 'trips'
mode_numbers = [4,5]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = calculate_weighted_average_by_category(temp)
nm = df_region_period[['Period', 'dist']]
nm.columns = ['Period', 'nm']
#df_region_period.to_csv('nm_ls.csv')

summary_col = 'trips'
mode_numbers = [9]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = calculate_weighted_average_by_category(temp)
rh = df_region_period[['Period', 'dist']]
rh.columns = ['Period', 'rh']
#df_region_period.to_csv('rh_ls.csv')

final = pd.merge(trn, auto, on = 'Period').merge(
                    nm, on='Period').merge(
                    rh, on='Period')

final.to_csv("ls_benefits"+concept_id+"_trip_length.csv")

In [39]:
summary_col = 'allmode_ls_adj'
mode_numbers = [6,7,8]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
trn = df_region_period[['Period', 'Value']]
trn.columns = ['Period', 'trn']
#df_region_period.to_csv('trn_ls.csv')

summary_col = 'allmode_ls_adj'
mode_numbers = [1,2,3]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
auto = df_region_period[['Period', 'Value']]
auto.columns = ['Period', 'auto']
#df_region_period.to_csv('auto_ls.csv')

summary_col = 'allmode_ls_adj'
mode_numbers = [4,5]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
nm = df_region_period[['Period', 'Value']]
nm.columns = ['Period', 'nm']
#df_region_period.to_csv('nm_ls.csv')

summary_col = 'allmode_ls_adj'
mode_numbers = [9]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
rh = df_region_period[['Period', 'Value']]
rh.columns = ['Period', 'rh']
#df_region_period.to_csv('rh_ls.csv')

final = pd.merge(trn, auto, on = 'Period').merge(
                    nm, on='Period').merge(
                    rh, on='Period')

final.to_csv("ls_benefits_allmode"+concept_id+".csv")

In [40]:
import pandas as pd
from itertools import product

numbers_1_to_3332 = range(1, 3333)
combinations_1_to_5 = range(1, 6)
combinations_strings = ['am', 'md', 'pm', 'ev', 'ea']

combinations = list(product(numbers_1_to_3332, numbers_1_to_3332, combinations_strings))

df = pd.DataFrame(combinations, columns=['orig_taz', 'dest_taz', 'Period'])
df.head()

Unnamed: 0,orig_taz,dest_taz,Period
0,1,1,am
1,1,1,md
2,1,1,pm
3,1,1,ev
4,1,1,ea


In [42]:
auto = final_trips.loc[final_trips['trip_mode'].isin([1,2,3])]
trn = final_trips.loc[final_trips['trip_mode'].isin([6,7,8])]
nm = final_trips.loc[final_trips['trip_mode'].isin([4,5])]
rh = final_trips.loc[final_trips['trip_mode'].isin([9])]                      

In [45]:
%%time
auto_trip = auto.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
auto_trip = auto_trip.rename(columns={'trips': 'auto_trips'})

trn_trip = trn.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
trn_trip = trn_trip.rename(columns={'trips': 'trn_trips'})

nm_trip = nm.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
nm_trip = nm_trip.rename(columns={'trips': 'nm_trips'})

rh_trip = rh.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
rh_trip = rh_trip.rename(columns={'trips': 'rh_trips'})

Wall time: 10.1 s


In [51]:
%%time
all_modes_trips=pd.merge(df, auto_trip,  on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    trn_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    nm_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    rh_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left')

Wall time: 1min 16s


In [None]:
#mean of logsum benefits

In [56]:
%%time
auto_ls = auto.groupby(['orig_taz', 'dest_taz', 'Period'])['ls_benefit_auto'].sum().reset_index()
auto_ls = auto_ls.rename(columns={'ls_benefit_auto': 'auto_ls'})

trn_ls = trn.groupby(['orig_taz', 'dest_taz', 'Period'])['ls_benefit_transit'].sum().reset_index()
trn_ls = trn_ls.rename(columns={'ls_benefit_transit': 'trn_ls'})

nm_ls = nm.groupby(['orig_taz', 'dest_taz', 'Period'])['ls_benefit_nm'].sum().reset_index()
nm_ls = nm_ls.rename(columns={'ls_benefit_nm': 'nm_ls'})

rh_ls = rh.groupby(['orig_taz', 'dest_taz', 'Period'])['ls_benefit_raidehail'].sum().reset_index()
rh_ls = rh_ls.rename(columns={'ls_benefit_raidehail': 'rh_ls'})

Wall time: 10.4 s


In [59]:
%%time
all_modes_ls = pd.merge(df, auto_ls,  on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    trn_ls, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    nm_ls, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    rh_ls, on=['orig_taz', 'dest_taz', 'Period'], how='left')

Wall time: 1min 15s


In [60]:
all_modes = pd.merge(all_modes_trips, all_modes_ls, on=['orig_taz', 'dest_taz', 'Period'], how='left')

In [61]:
all_modes = all_modes.fillna(0)

In [62]:
all_modes.to_parquet(_join(concept_id+'_perc_ls_trips.parquet'))

## Creating Summaries

In [28]:
# Regional Value

def create_summaries(final_trips, summary_col, filename_verbose, metric_num, filename_extension, mode_numbers):
    
    temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
    # Region
    df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
    df_region_period = df_region_period[['Period', 'Value']]

    df_region_period['Concept_ID'] = concept_id
    df_region_period['Metric_ID'] = metric_num
    df_region_period['Metric_name'] = 'Travel time savings'
    df_region_period['Submetric'] = metric_num + '.1'
    df_region_period['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_region_period['Population'] = 'Whole Population'
    df_region_period['Geography'] = 'Region'
    df_region_period['Zone_ID'] = ''
    df_region_period['Income'] = ''
    df_region_period['Mode'] = ''
    df_region_period['Purpose'] = ''
    df_region_period['Origin_zone'] = ''
    df_region_period['Dest_zone'] = ''
    df_region_period['Units'] = 'minutes'
    df_region_period['Total_Increment'] = ''
    
    # County
    df_cnty = summarize_all_combinations(temp, groupby_columns=['orig_county', 'dest_county', 'Period'], 
                                           summary_column=summary_col)

    df_cnty = df_cnty.rename(columns={ 
                                      'orig_county' : 'Origin_zone',
                                      'dest_county' : 'Dest_zone'})
    df_cnty = df_cnty[['Origin_zone', 'Dest_zone',  'Period', 'Value']]

    df_cnty['Concept_ID'] = concept_id
    df_cnty['Metric_ID'] = metric_num
    df_cnty['Metric_name'] = 'Travel time savings'
    df_cnty['Submetric'] =  metric_num + '.2'
    df_cnty['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination county'
    df_cnty['Population'] = 'Whole Population'
    df_cnty['Geography'] = 'County'
    df_cnty['Zone_ID'] = ''
    df_cnty['Income'] = ''
    df_cnty['Mode'] = ''
    df_cnty['Purpose'] = ''
    df_cnty['Units'] = 'minutes'
    df_cnty['Total_Increment'] = ''
    
    # RDM
    df_rdm = summarize_all_combinations(temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period'], 
                                           summary_column=summary_col)

    df_rdm = df_rdm.rename(columns={ 
                                    'orig_rdm_zones' : 'Origin_zone',
                                    'dest_rdm_zones' : 'Dest_zone'})

    df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_rdm['Concept_ID'] = concept_id
    df_rdm['Metric_ID'] = metric_num
    df_rdm['Metric_name'] = 'Travel time savings'
    df_rdm['Submetric'] =  metric_num + '.3'
    df_rdm['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination RDM zone'
    df_rdm['Population'] = 'Whole Population'
    df_rdm['Geography'] = 'RDM'
    df_rdm['Zone_ID'] = ''
    df_rdm['Income'] = ''
    df_rdm['Mode'] = ''
    df_rdm['Purpose'] = ''
    df_rdm['Units'] = 'minutes'
    df_rdm['Total_Increment'] = ''
    
    # Super Districts
    df_sd = summarize_all_combinations(temp, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period'], 
                                           summary_column=summary_col)

    df_sd = df_sd.rename(columns={ 
                                  'orig_super_dist' : 'Origin_zone',
                                  'dest_super_dist' : 'Dest_zone'})
    df_sd = df_sd[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_sd['Concept_ID'] = concept_id
    df_sd['Metric_ID'] = metric_num
    df_sd['Metric_name'] = 'Travel time savings'
    df_sd['Submetric'] =  metric_num + '.4'
    df_sd['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination super district'
    df_sd['Population'] = 'Whole Population'
    df_sd['Geography'] = 'Super district'
    df_sd['Zone_ID'] = ''
    df_sd['Income'] = ''
    df_sd['Mode'] = ''
    df_sd['Purpose'] = ''
    df_sd['Units'] = 'trips'
    df_sd['Total_Increment'] = ''
    
    # Prioirty Population
    temp['pp_wtd_benefit'] = temp[summary_col] * temp['pp_share']/100
    df_pp = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column='pp_wtd_benefit')
    df_pp = df_pp[['Period', 'Value']]

    df_pp['Concept_ID'] = concept_id
    df_pp['Metric_ID'] = metric_num
    df_pp['Metric_name'] = 'Travel time savings'
    df_pp['Submetric'] =  metric_num + '.5'
    df_pp['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_pp['Population'] = 'Prioirty population'
    df_pp['Geography'] = 'Region'
    df_pp['Zone_ID'] = ''
    df_pp['Origin_zone'] = ''
    df_pp['Dest_zone'] = ''
    df_pp['Income'] = ''
    df_pp['Mode'] = ''
    df_pp['Purpose'] = ''
    df_pp['Units'] = 'trips'
    df_pp['Total_Increment'] = ''
    
    all_dfs = [df_region_period, df_cnty, df_rdm, df_sd, df_pp]

    for dfs in all_dfs:
        metric_name = filename_verbose #'_travel_time_auto_savings_'
        dfs = dfs.reset_index(drop=True)
        dfs = dfs[perf_measure_columns]
        file_name = dfs['Submetric'][0]
        geography = '_' + dfs['Geography'][0].replace(' ', '_')
        dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
        print(len(dfs), file_name, dfs['Metric_name'][0])

    combined_df = pd.concat([df_region_period, df_cnty, df_rdm, df_sd, df_pp]).reset_index(drop=True)
    combined_df.to_csv(_join(summary_dir,  metric_num + filename_verbose + concept_id + '_region' +filename_extension + '.csv'), index=None)

In [29]:
#create_summaries(final_trips,'ls_benefit_transit', '_travel_time_savings_transit_', 'E1.1', filename_extension, mode_numbers=[6,7,8])

In [30]:
#create_summaries(final_trips,'ls_benefit_auto', '_travel_time_savings_auto_', 'E1.2', filename_extension,  mode_numbers=[1,2,3])

In [31]:
#create_summaries(final_trips, 'ls_benefit_raidehail', '_travel_time_savings_ridehail_', 'E1.3', filename_extension, mode_numbers=[9])

In [32]:
#create_summaries(final_trips, 'ls_benefit_nm', '_travel_time_savings_non-motorized', 'E1.4', filename_extension, mode_numbers=[4,5])

## Effective Density Calculations

In [33]:
common_dir = params['common_dir']
decay_param_goods = 1.8
decay_param_services = 1.9

In [34]:
emp_data = pd.read_csv(_join(common_dir, f'EmpBreakdown{model_year}.csv'))

In [35]:
#emp_data['sector'] =  emp_data['link21'].map(emp_dict)
emp_data = emp_data.groupby(['TAZ'])['jobs'].sum().reset_index()
#emp_data = pd.pivot(emp_data, index='TAZ', columns='sector' , values='jobs').reset_index()

In [36]:
od_logsums = final_trips.groupby(['orig_taz', 'dest_taz'])['allmode_ls'].mean().reset_index()

In [37]:
od_logsums = pd.merge(od_logsums, emp_data, left_on='dest_taz', right_on='TAZ', how='left')
od_logsums['jobs'] = od_logsums['jobs'].fillna(0)

In [38]:
od_logsums['alpha_goods'] = decay_param_goods
od_logsums['alpha_services'] = decay_param_services

od_logsums['effective_density_good'] = od_logsums['jobs'] / (od_logsums['alpha_goods'] * od_logsums['allmode_ls'])
od_logsums['effective_density_services'] = od_logsums['jobs'] / (od_logsums['alpha_services'] * od_logsums['allmode_ls'])

In [39]:
od_logsums_orig = od_logsums.groupby(['orig_taz'])['effective_density_good', 'effective_density_services'].sum().reset_index()

In [40]:
sector_column = ['effective_density_good', 'effective_density_services']
sector_column = 'effective_density_good'

In [41]:
od_logsums_orig

Unnamed: 0,orig_taz,effective_density_good,effective_density_services
0,1,1917237.186,1816329.966
1,2,2305372.202,2184036.823
2,3,3270978.007,3098821.269
3,4,1820216.755,1724415.873
4,5,2868946.411,2717949.231
...,...,...,...
3316,3328,2940197.225,2785450.003
3317,3329,851547.935,806729.623
3318,3330,2174829.758,2060365.034
3319,3331,1644407.358,1557859.602


In [42]:
def effective_density_summary(concept_id, od_logsums_orig, metric_num, sector_column, verbose, filename_extension, filename_verbose):
    df_region_ed = od_logsums_orig[['orig_taz', sector_column]]
    df_region_ed.columns = ['Origin_zone', 'Value']

    df_region_ed['Concept_ID'] = concept_id
    df_region_ed['Metric_ID'] = metric_num
    df_region_ed['Metric_name'] = 'Effective density'
    df_region_ed['Submetric'] = metric_num 
    df_region_ed['Description'] = 'Effective density for ' + verbose + ' in origin zone'
    df_region_ed['Population'] = 'Whole Population'
    df_region_ed['Geography'] = 'Region'
    df_region_ed['Zone_ID'] = ''
    df_region_ed['Income'] = ''
    df_region_ed['Mode'] = ''
    df_region_ed['Purpose'] = ''
    #df_region_ed['Origin_zone'] = '
    df_region_ed['Dest_zone'] = ''
    df_region_ed['Period'] = ''
    df_region_ed['Units'] = ''
    df_region_ed['Total_Increment'] = ''
    
    df_region_ed = df_region_ed[perf_measure_columns]
    df_region_ed.to_csv(_join(summary_dir,  metric_num + filename_verbose + \
                              concept_id + '_region' +filename_extension + '.csv'), index=None)

In [43]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.1', 
                          'effective_density_good', 'goods producing industries', 
                          filename_extension, '_effective_density_goods_producing_industries_')

In [44]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.2', 
                          'effective_density_services', 'services producing industries', 
                          filename_extension, '_effective_density_services_producing_industries_')