In [2]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

In [3]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
ctramp_dir = params['ctramp_dir']
model_outputs_dir = params['model_dir']
summary_dir = params['summary_dir']
concept_id = params['concept_id']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
model_year = params['model_year']
filename_extension = params['filename_extension']
hwy_skims_dir = _join(model_outputs_dir, r'skims\highway' )

In [4]:
Path(summary_dir).mkdir(parents=True, exist_ok=True)
Path(preprocess_dir).mkdir(parents=True, exist_ok=True)

In [5]:
purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

### Calculate the taxi wait time for each origin zone

In [5]:
taz = pd.read_csv(_join(ctramp_dir, 'landuse', 'tazData_' + str(model_year) + '.csv'))
taz['popEmpSqMile'] = (taz['TOTPOP'] + taz['TOTEMP']) / (taz['TOTACRE'] * 0.0015625)

In [6]:
%%time
taz = taz[['ZONE', 'popEmpSqMile']]

# TNC 
#TNC_single_waitTime_mean =  10.3,8.5,8.4,6.3,3.0
#TNC_single_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#TNC_shared_waitTime_mean =  15.0,15.0,11.0,8.0,5.0
#TNC_shared_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#Taxi_waitTime_mean = 26.5,17.3,13.3,9.5,5.5
#Taxi_waitTime_sd =    6.4,6.4,6.4,6.4,6.4

#WaitTimeDistribution_EndPopEmpPerSqMi = 500,2000,5000,15000,9999999999

#TO DO: Ask John which wait time to use
taz['density_group'] = pd.cut(taz['popEmpSqMile'], bins= [-1, 500,2000,5000,15000,9999999999], 
                              labels=[10.3,8.5,8.4,6.3,3.0], ordered=False)
#taz['density_group'] = taz['density_group'].fillna(0)
taz['density_group'] =taz['density_group'].astype("int64")

taz = taz.sort_values('ZONE')
taxi_wait_time = np.repeat(taz['density_group'].values, len(taz)).reshape(len(taz), len(taz))

Wall time: 60.1 ms


### Load all the data from Skims 

In [7]:
%%time
# The data tab of the UEC file lists all the matrix cores and location an matrix files of skims
# 1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

# extract the file names, matrix cores 
matrix_df = pd.read_excel(_join(params['common_dir'], r"TripModeChoice.xlsx"), sheet_name='data')
matrix_df = matrix_df.iloc[9:]
matrix_df.columns = ['no', 'token', 'format', 'file','matrix', 'group', 'index']
#matrix_df[1:5]

# pre-processing
matrix_df['matrix_files'] = matrix_df['file'].str.replace('skims/', '')
matrix_df['path'] = 'skims'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('nonmot')==True, 'path'] = 'active'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('trnskm')==True, 'path'] = 'transit'
#matrix_df.loc[matrix_df['matrix_files'].str.contains('hwyskm')==True, 'path'] = 'highway'
#matrix_df[1:5]

# Iterate over the DataFrame rows
for _, row in matrix_df.iterrows():
    variable_name = row['token']
    file_path = row['path']
    filename = row['matrix_files']
    matrix_cr = row['matrix']
    
    # Extract the variable name and index (if present)
    if '[' in variable_name:
        name_start = variable_name.index('[')
        name_end = variable_name.index(']')
        index = int(variable_name[name_start+1:name_end])
        variable_name = variable_name[:name_start]
    else:
        index=None
    
    # Read the file using numpy.load() and assign it to the variable with the specified index
    file = omx.open_file(_join(ctramp_dir, file_path, filename))
    file_contents = np.array(file[matrix_cr])
    print(variable_name,index, _join(ctramp_dir, file_path, filename), file_contents.sum(), file_contents.min(), file_contents.max())
    if '[' in row['token']:
        if variable_name in locals() and isinstance(locals()[variable_name], np.ndarray):
            arr = locals()[variable_name]
            if index >= len(arr):
                # Resize the array if the index is out of bounds
                new_arr = np.resize(arr, index + 1)
                new_arr[index] = file_contents
                locals()[variable_name] = new_arr
            else:
                arr[index] = file_contents
        else:
            arr = np.empty(index + 1, dtype=object)
            arr[index] = file_contents
            locals()[variable_name] = arr
    else:
        locals()[variable_name] = file_contents

DISTWALK None C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\nonmotskm.omx 10796411595951.248 0.03142102696417673 1000000.0
DISTBIKE None C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\nonmotskm.omx 8910510616075.47 0.018911417199612486 1000000.0
SOV_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 13965159000.0 0.114028856 1000000.0
SOV_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 13847489000.0 0.035375483 1000000.0
SOV_BTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 800401540.0 0.0 472.0
SOV_VTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 740640100.0 0.0 334.74524
HOV2_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 13957910000.0 0.114028856 1000000.0
HOV2_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\hwyskmEA.omx 13843948000.0 0.035375483 1000000.0
HOV2_BTOLL 1 C:\MTC_tmpy\TM

WLK_TRN_WLK_IVT_COM 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 10734498000.0 0.0 16516.65
WLK_TRN_WLK_FAR 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 5024411600.0 0.0 1631.282
WLK_TRN_WLK_WAUX 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 21836663000.0 0.0 105125.07
WLK_TRN_WLK_IWAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 7644272600.0 0.0 4500.0
WLK_TRN_WLK_XWAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 8024541700.0 0.0 10121.753
WLK_TRN_WLK_BOARDS 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_WLK.omx 23023680.0 0.0 7.625
WLK_TRN_WLK_WAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_WLK.omx 16956155000.0 0.0 20288.414
WLK_TRN_WLK_TOTIVT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trns

PNR_TRN_WLK_WAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 12545652000.0 0.0 8847.0
PNR_TRN_WLK_TOTIVT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 57901363000.0 0.0 30551.26
PNR_TRN_WLK_CROWD 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 130149370.0 0.0 652.9543
PNR_TRN_WLK_IVT_LOC 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 6788387000.0 0.0 13043.811
PNR_TRN_WLK_IVT_EXP 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 7820240400.0 0.0 15081.238
PNR_TRN_WLK_IVT_LRT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 1525240800.0 0.0 7407.6797
PNR_TRN_WLK_IVT_FRY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_PNR_TRN_WLK.omx 474548450.0 0.0 3342.4675
PNR_TRN_WLK_IVT_HVY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_33

PNR_TRN_WLK_XWAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmPM_PNR_TRN_WLK.omx 11754125000.0 0.0 10839.6045
PNR_TRN_WLK_BOARDS 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmPM_PNR_TRN_WLK.omx 19335464.0 0.0 6.545499
PNR_TRN_WLK_WAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_PNR_TRN_WLK.omx 14291795000.0 0.0 16097.0
PNR_TRN_WLK_TOTIVT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_PNR_TRN_WLK.omx 59531050000.0 0.0 33616.043
PNR_TRN_WLK_CROWD 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_PNR_TRN_WLK.omx 1342422000.0 0.0 1873.5887
PNR_TRN_WLK_IVT_LOC 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_PNR_TRN_WLK.omx 7747350000.0 0.0 15463.069
PNR_TRN_WLK_IVT_EXP 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_PNR_TRN_WLK.omx 6728489000.0 0.0 15865.598
PNR_TRN_WLK_IVT_LRT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3

KNR_TRN_WLK_WAUX 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmMD_KNR_TRN_WLK.omx 5356493000.0 0.0 31279.443
KNR_TRN_WLK_IWAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmMD_KNR_TRN_WLK.omx 7520908300.0 0.0 15000.0
KNR_TRN_WLK_XWAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmMD_KNR_TRN_WLK.omx 3044543500.0 0.0 7416.6665
KNR_TRN_WLK_BOARDS 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmMD_KNR_TRN_WLK.omx 15897323.0 0.0 5.8448277
KNR_TRN_WLK_WAIT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmPM_KNR_TRN_WLK.omx 10436086000.0 0.0 13148.934
KNR_TRN_WLK_TOTIVT 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmPM_KNR_TRN_WLK.omx 56415360000.0 0.0 35430.1
KNR_TRN_WLK_CROWD 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmPM_KNR_TRN_WLK.omx 3022846500.0 0.0 2912.9648
KNR_TRN_WLK_IVT_LOC 4 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skim

WLK_TRN_PNR_DTIM 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx -3.1165005e+28 -4.3541666e+22 17462.188
WLK_TRN_PNR_DDIST 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx 8777722000.0 0.0 13152.639
WLK_TRN_PNR_WAUX 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx 4846228000.0 0.0 45028.35
WLK_TRN_PNR_IWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx 9927324000.0 0.0 14628.0
WLK_TRN_PNR_XWAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx 5320166000.0 0.0 9166.55
WLK_TRN_PNR_BOARDS 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmAM_WLK_TRN_PNR.omx 21082660.0 0.0 6.25
WLK_TRN_PNR_WAIT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmMD_WLK_TRN_PNR.omx 15594249000.0 0.0 24088.895
WLK_TRN_PNR_TOTIVT 3 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\s

WLK_TRN_KNR_FAR 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 3782395000.0 0.0 1512.8937
WLK_TRN_KNR_DTIM 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx -6.6548273e+28 -4.3541666e+22 19151.768
WLK_TRN_KNR_DDIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 12176952000.0 0.0 15059.788
WLK_TRN_KNR_WAUX 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 2186748000.0 0.0 42590.492
WLK_TRN_KNR_IWAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 8617054000.0 0.0 4000.0
WLK_TRN_KNR_XWAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 2362888700.0 0.0 8100.0
WLK_TRN_KNR_BOARDS 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEA_WLK_TRN_KNR.omx 15208645.0 0.0 6.5
WLK_TRN_KNR_WAIT 2 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\

WLK_TRN_KNR_IVT_HVY 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 20910006000.0 0.0 12817.644
WLK_TRN_KNR_IVT_COM 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 8364231700.0 0.0 16475.98
WLK_TRN_KNR_FAR 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 3678527200.0 0.0 1443.4027
WLK_TRN_KNR_DTIM 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx -4.8123625e+28 -4.3541666e+22 17424.92
WLK_TRN_KNR_DDIST 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 15604398000.0 0.0 13495.182
WLK_TRN_KNR_WAUX 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 2902196000.0 0.0 43570.203
WLK_TRN_KNR_IWAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibration_3332\skims\trnskmEV_WLK_TRN_KNR.omx 12600226000.0 0.0 9000.0
WLK_TRN_KNR_XWAIT 5 C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\calibrat

In [8]:
# change th 1000000.0 values in DISTWALK and DISTBIKE to 0
DISTWALK = np.where(DISTWALK == 1000000.0, 0, DISTWALK)
DISTBIKE = np.where(DISTBIKE == 1000000.0, 0, DISTBIKE)
PNR_TRN_WLK_DTIM[1] =  np.where(PNR_TRN_WLK_DTIM[1] < 0, 0, PNR_TRN_WLK_DTIM[1])
PNR_TRN_WLK_DTIM[2] =  np.where(PNR_TRN_WLK_DTIM[2] < 0, 0, PNR_TRN_WLK_DTIM[2])
PNR_TRN_WLK_DTIM[3] =  np.where(PNR_TRN_WLK_DTIM[3] < 0, 0, PNR_TRN_WLK_DTIM[3])
PNR_TRN_WLK_DTIM[4] =  np.where(PNR_TRN_WLK_DTIM[4] < 0, 0, PNR_TRN_WLK_DTIM[4])
PNR_TRN_WLK_DTIM[5] =  np.where(PNR_TRN_WLK_DTIM[5] < 0, 0, PNR_TRN_WLK_DTIM[5])

KNR_TRN_WLK_DTIM[1] =  np.where(KNR_TRN_WLK_DTIM[1] < 0, 0, KNR_TRN_WLK_DTIM[1])
KNR_TRN_WLK_DTIM[2] =  np.where(KNR_TRN_WLK_DTIM[2] < 0, 0, KNR_TRN_WLK_DTIM[2])
KNR_TRN_WLK_DTIM[3] =  np.where(KNR_TRN_WLK_DTIM[3] < 0, 0, KNR_TRN_WLK_DTIM[3])
KNR_TRN_WLK_DTIM[4] =  np.where(KNR_TRN_WLK_DTIM[4] < 0, 0, KNR_TRN_WLK_DTIM[4])
KNR_TRN_WLK_DTIM[5] =  np.where(KNR_TRN_WLK_DTIM[5] < 0, 0, KNR_TRN_WLK_DTIM[5])

WLK_TRN_PNR_DTIM[1] =  np.where(WLK_TRN_PNR_DTIM[1] < 0, 0, WLK_TRN_PNR_DTIM[1])
WLK_TRN_PNR_DTIM[2] =  np.where(WLK_TRN_PNR_DTIM[2] < 0, 0, WLK_TRN_PNR_DTIM[2])
WLK_TRN_PNR_DTIM[3] =  np.where(WLK_TRN_PNR_DTIM[3] < 0, 0, WLK_TRN_PNR_DTIM[3])
WLK_TRN_PNR_DTIM[4] =  np.where(WLK_TRN_PNR_DTIM[4] < 0, 0, WLK_TRN_PNR_DTIM[4])
WLK_TRN_PNR_DTIM[5] =  np.where(WLK_TRN_PNR_DTIM[5] < 0, 0, WLK_TRN_PNR_DTIM[5])

WLK_TRN_KNR_DTIM[1] =  np.where(WLK_TRN_KNR_DTIM[1] < 0, 0, WLK_TRN_KNR_DTIM[1])
WLK_TRN_KNR_DTIM[2] =  np.where(WLK_TRN_KNR_DTIM[2] < 0, 0, WLK_TRN_KNR_DTIM[2])
WLK_TRN_KNR_DTIM[3] =  np.where(WLK_TRN_KNR_DTIM[3] < 0, 0, WLK_TRN_KNR_DTIM[3])
WLK_TRN_KNR_DTIM[4] =  np.where(WLK_TRN_KNR_DTIM[4] < 0, 0, WLK_TRN_KNR_DTIM[4])
WLK_TRN_KNR_DTIM[5] =  np.where(WLK_TRN_KNR_DTIM[5] < 0, 0, WLK_TRN_KNR_DTIM[5])

In [9]:
# randomly check few matrix cores
#PNR_TRN_WLK_DDIST[4].sum()
#PNR_TRN_WLK_DDIST[2].sum()

In [9]:
# ct ramp has params.properties which has certain parameter values used in the utility equations. 
# Following function extracts these values.


def extract_property_values(file_path, variables):
    property_values = {}
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line and not line.startswith('#'):
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                if key in variables:
                    property_values[key] = value
    return property_values

In [12]:
%%time
for purp in purpose:
    #print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    # Removing NAs
    uec_purp_params_prop = uec_purp.loc[~uec_purp['Token'].isna()]
    # extract the parameters that have % in in their names, clean up-remove % and replace . with _
    uec_purp_params_prop = uec_purp_params_prop.loc[(uec_purp_params_prop['Formula for variable'].str.contains('%')==True)]
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace('%', '') 
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace(".", "_")
    # read parameters file
    file_path = _join(ctramp_dir, 'input', 'params.properties')
    # extract list of parameters
    prop_variables = list(uec_purp_params_prop['Formula for variable'])
    prop_variables_tokens = list(uec_purp_params_prop['Token'])
    prop_variables = [x.replace('_', '.') for x in prop_variables]

    values = extract_property_values(file_path, prop_variables)
    
    # Create a dictionary to store the extracted values
    extracted_values = {}

    # Assign the extracted values to the dictionary
    for variable, value in values.items():
        extracted_values[variable] = value

    # Print the values from the extracted_values dictionary 
    for variable, value in extracted_values.items():
        #print(f'{variable}: {value}')
        exec(f'{variable.replace(".", "_")} = {value}')
    
    
    # Assign the values to tokens
    # example costInitialTaxi = %taxi.baseFare%
    for _, row in uec_purp_params_prop.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
            #print(f"Variable '{variable_name}' is defined.")
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue

    
    uec_purp_params = uec_purp.loc[~uec_purp['Formula for variable'].isna()]
    uec_purp_params = uec_purp_params.loc[~uec_purp_params['Token'].isna()]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('if')==True)]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('%')==True)]

    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].astype(str)
    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].str.replace('@', '')

    key_column = 'Token'
    value_column = 'Formula for variable'

    # Create dictionary from selected columns
    data_dict = {}

    for _, row in uec_purp_params.iterrows():
        key = row[key_column]
        value = row[value_column]

        # Handle values that are strings
        if isinstance(value, str):
            try:
                value = int(value)
                data_dict[key] = value
            except ValueError:
                try:
                    value = float(value)
                    data_dict[key] = value
                except ValueError:
                    pass

    #get all the parameters
    variables = data_dict

    for _, row in uec_purp_params.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue
    
    #break
    
    int_zone = 3332
    da_util = np.empty((5, int_zone, int_zone))
    sr2_util = np.empty((5, int_zone, int_zone))
    sr3_util = np.empty((5, int_zone, int_zone))
    wlk_util =  np.empty((5, int_zone, int_zone))
    bike_util = np.empty((5, int_zone, int_zone))
    wlk_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_pnr_util = np.empty((5, int_zone, int_zone))
    pnr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_knr_util = np.empty((5, int_zone, int_zone))
    knr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    taxi_util = np.empty((5, int_zone, int_zone))
    
    for tripPeriod in time_period:
        #for trip_mode in oth_modes:
        #uec_purp_mode = uec_purp_df.loc[uec_purp_df['Description'].str.contains(trip_mode)==True]
        #uec_purp_mode['Formula for variable'] = uec_purp_mode['Formula for variable'].str.replace('tripPeriod', str(period))
        #uec_purp_mode['formula_calculation'] = 
        util = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'),'w')

        #Drive alone
        util['DA'] = c_ivt*SOV_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'DA', " ", np.array(util['DA']).min(), np.array(util['DA']).max())

        #Shared ride 2
        util['SR2'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR2', " ", np.array(util['SR2']).min(), np.array(util['SR2']).max())

        #Shared ride 3
        util['SR3'] = c_ivt*HOV3_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR3', " ", np.array(util['SR3']).min(), np.array(util['SR3']).max())

        # Walk 
        util['WALK'] = (walk_dist<=1)* (c_walkTimeShort * np.minimum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) + \
                       (walk_dist>1)* (c_walkTimeLong * np.maximum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) 
        print(tripPeriod, purp, 'WALK', " ", np.array(util['WALK']).min(), np.array(util['WALK']).max())
        
        #Bike
        util['BIKE'] = (bike_dist<=6)*(c_bikeTimeShort* np.minimum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed)) + \
                       (bike_dist>6)*(c_bikeTimeLong* np.maximum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed))
        print(tripPeriod, purp, 'BIKE', " ", np.array(util['BIKE']).min(), np.array(util['BIKE']).max())
        
        
        #Walk transit Walk
        util['WLK_TRN_WLK'] =  c_ivt*WLK_TRN_WLK_IVT_LOC[tripPeriod]/100 + \
                            c_ivt_exp*WLK_TRN_WLK_IVT_EXP[tripPeriod]/100 + \
                            c_ivt_lrt*WLK_TRN_WLK_IVT_LRT[tripPeriod]/100 + \
                            c_ivt_ferry*WLK_TRN_WLK_IVT_FRY[tripPeriod]/100 + \
                            c_ivt_hvy*WLK_TRN_WLK_IVT_HVY[tripPeriod]/100 + \
                            c_ivt_com*WLK_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_wlk * np.maximum(WLK_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_WLK_WAUX[tripPeriod]/100
        
        print(tripPeriod, purp, 'WLK_TRN_WLK', " ", np.array(util['WLK_TRN_WLK']).min(), np.array(util['WLK_TRN_WLK']).max())
        
        # Walk Transit PNR - Inbound
        util['WLK_TRN_PNR'] =  c_ivt*WLK_TRN_PNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_PNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_PNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_PNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_PNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_PNR_XWAIT[tripPeriod]/100 + \
                            c_dtim*WLK_TRN_PNR_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_PNR_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_PNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_PNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_PNR', " ", np.array(util['WLK_TRN_PNR']).min(), np.array(util['WLK_TRN_PNR']).max())

        # PNR transit Walk - Outbound
        util['PNR_TRN_WLK'] =  c_ivt*PNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*PNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*PNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(PNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(PNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*PNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_dtim*PNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_waux*PNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(PNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'PNR_TRN_WLK', " ", np.array(util['PNR_TRN_WLK']).min(), np.array(util['PNR_TRN_WLK']).max())

        # Walk Transit KNR - Inbound
        util['WLK_TRN_KNR'] = c_ivt*WLK_TRN_KNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_KNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_KNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_KNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_KNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_KNR_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_KNR_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*WLK_TRN_KNR_DTIM[tripPeriod]/100 + \
                            c_waux*WLK_TRN_KNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_KNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_KNR', " ", np.array(util['WLK_TRN_KNR']).min(), np.array(util['WLK_TRN_KNR']).max())

        # KNR Transit Walk - Outbound
        util['KNR_TRN_WLK'] = c_ivt*KNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*KNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*KNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(KNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(KNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*KNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(KNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*KNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_waux*KNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(KNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'KNR_TRN_WLK', " ", np.array(util['KNR_TRN_WLK']).min(), np.array(util['KNR_TRN_WLK']).max())

        # taxi
        util['RIDEHAIL'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]  + c_ivt*1.5*taxi_wait_time
        print(tripPeriod, purp, 'RIDEHAIL', " ", np.array(util['RIDEHAIL']).min(), np.array(util['RIDEHAIL']).max())
        
    
        util.close()

1 Work DA   -22000.0 -0.002508635
1 Work SR2   -22000.0 -0.002508635
1 Work SR3   -22000.0 -0.002508635
1 Work WALK   -19.223921100079313 -0.0
1 Work BIKE   -47.724191393766105 -0.0
1 Work WLK_TRN_WLK   -54.10539 -0.0
1 Work WLK_TRN_PNR   -26.305851 0.0
1 Work PNR_TRN_WLK   -38.717144 0.0
1 Work WLK_TRN_KNR   -26.21711 0.0
1 Work KNR_TRN_WLK   -38.127213 0.0
1 Work RIDEHAIL   -22000.33 -0.10150863493420184
2 Work DA   -22000.0 -0.002520339
2 Work SR2   -22000.0 -0.002520339
2 Work SR3   -22000.0 -0.002520339
2 Work WALK   -19.223921100079313 -0.0
2 Work BIKE   -47.724191393766105 -0.0
2 Work WLK_TRN_WLK   -34.29956 -0.0
2 Work WLK_TRN_PNR   -29.593216 0.0
2 Work PNR_TRN_WLK   -18.554993 0.0
2 Work WLK_TRN_KNR   -29.443739 0.0
2 Work KNR_TRN_WLK   -15.397717 0.0
2 Work RIDEHAIL   -22000.33 -0.10152033909782768
3 Work DA   -22000.0 -0.002536666
3 Work SR2   -22000.0 -0.002536666
3 Work SR3   -22000.0 -0.002536666
3 Work WALK   -19.223921100079313 -0.0
3 Work BIKE   -47.724191393766105 -0

4 Escort PNR_TRN_WLK   -23.283047 0.0
4 Escort WLK_TRN_KNR   -35.969284 0.0
4 Escort KNR_TRN_WLK   -23.042349 0.0
4 Escort RIDEHAIL   -27900.4185 -0.1287432785641402
5 Escort DA   -27900.0 -0.00318307
5 Escort SR2   -27900.0 -0.00318307
5 Escort SR3   -27900.0 -0.00318307
5 Escort WALK   -24.379427213282405 -0.0
5 Escort BIKE   -60.52295181300339 -0.0
5 Escort WLK_TRN_WLK   -58.266106 -0.0
5 Escort WLK_TRN_PNR   -41.857964 0.0
5 Escort PNR_TRN_WLK   -37.605713 0.0
5 Escort WLK_TRN_KNR   -37.50794 0.0
5 Escort KNR_TRN_WLK   -33.73477 0.0
5 Escort RIDEHAIL   -27900.4185 -0.1287330701045692
1 Shopping DA   -27900.0 -0.003181405
1 Shopping SR2   -27900.0 -0.003181405
1 Shopping SR3   -27900.0 -0.003181405
1 Shopping WALK   -24.379427213282405 -0.0
1 Shopping BIKE   -60.52295181300339 -0.0
1 Shopping WLK_TRN_WLK   -67.72034 -0.0
1 Shopping WLK_TRN_PNR   -33.360603 0.0
1 Shopping PNR_TRN_WLK   -49.10038 0.0
1 Shopping WLK_TRN_KNR   -33.24806 0.0
1 Shopping KNR_TRN_WLK   -48.352234 0.0
1 Shop

3 Social SR3   -27900.0 -0.003216954
3 Social WALK   -24.379427213282405 -0.0
3 Social BIKE   -60.52295181300339 -0.0
3 Social WLK_TRN_WLK   -55.0562 -0.0
3 Social WLK_TRN_PNR   -35.17603 0.0
3 Social PNR_TRN_WLK   -34.156662 0.0
3 Social WLK_TRN_KNR   -34.660194 0.0
3 Social KNR_TRN_WLK   -31.285757 0.0
3 Social RIDEHAIL   -27900.4185 -0.12876695394814014
4 Social DA   -27900.0 -0.0031932786
4 Social SR2   -27900.0 -0.0031932786
4 Social SR3   -27900.0 -0.0031932786
4 Social WALK   -24.379427213282405 -0.0
4 Social BIKE   -60.52295181300339 -0.0
4 Social WLK_TRN_WLK   -39.1222 -0.0
4 Social WLK_TRN_PNR   -36.362347 0.0
4 Social PNR_TRN_WLK   -23.283047 0.0
4 Social WLK_TRN_KNR   -35.969284 0.0
4 Social KNR_TRN_WLK   -23.042349 0.0
4 Social RIDEHAIL   -27900.4185 -0.1287432785641402
5 Social DA   -27900.0 -0.00318307
5 Social SR2   -27900.0 -0.00318307
5 Social SR3   -27900.0 -0.00318307
5 Social WALK   -24.379427213282405 -0.0
5 Social BIKE   -60.52295181300339 -0.0
5 Social WLK_TRN_W

In [None]:
util.close()

In [6]:
# add mappings from time period and purpose
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
len(df_trips)

36011057

In [15]:
# inbound trips get orig purpose, outbound trips get dest purpose
df_trips['util_purpose'] = np.where(df_trips['inbound']==1, df_trips['orig_purpose'], df_trips['dest_purpose'])

In [14]:
#df_trips['util_purpose'].value_counts()

In [16]:
purp_dict = { 'work' : 'Work', 
              'shopping' : 'Shopping',
              'escort' : 'Escort', 
              'othdiscr': 'OthDiscr',
              'othmaint': 'OthMaint',
              'school' : 'School', 
              'eatout' : 'EatOut', 
              'atwork' : 'WorkBased', 
              'social' : 'Social',
              'university' : 'University'}

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

In [17]:
df_trips['util_purpose'] = df_trips['util_purpose'].map(purp_dict)

In [18]:
%%time

import pandas as pd
import itertools

num_zones = 3332

for tripPeriod, value in time_period.items():

    for purp in purpose:
        print(f'Analyzing purpose: {purp} and time period: {value}')
        #df_temp = df_trips.query(f"util_purpose == {purp} and Period == {value.lower()}")
        #df_temp = df_trips.query(f"util_purpose == '{purp}' and Period == '{value.lower}'")
        df_temp = df_trips.loc[(df_trips['util_purpose'] == purp) & (df_trips['Period'] == value.lower())]
        
        # Generate all combinations of orig and dest
        combinations = list(itertools.product(range(1, num_zones + 1), repeat=2))

        # Create the DataFrame with orig and dest columns
        purp_df = pd.DataFrame(combinations, columns=['orig', 'dest'])

        # read utility files
        util_file = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'))

        for core in util_file.list_matrices():
            print(f'extracting {core} core form utility file')
            mode_core = np.array(util_file[core])
            mode_core = np.where(mode_core == 0, -999, mode_core)
            skm_df = pd.DataFrame(mode_core)
            skm_df = pd.melt(skm_df.reset_index(), id_vars='index', value_vars=skm_df.columns)
            skm_df['index'] = skm_df['index'] + 1
            skm_df['variable'] = skm_df['variable'] + 1
            skm_df.columns = ['orig', 'dest', core]
            purp_df = pd.merge(purp_df, skm_df, on=['orig', 'dest'], how='left')
        
        df_temp = pd.merge(df_temp, purp_df, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        
        print(f'writing the trip file for purpose : {purp} and time period: {value}', df_temp.shape)
        df_temp.to_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        
        #break

Analyzing purpose: Work and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Work and time period: EA (294523, 40)
Analyzing purpose: University and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility f

Analyzing purpose: Shopping and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Shopping and time period: AM (734020, 40)
Analyzing purpose: EatOut and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utili

Analyzing purpose: OthDiscr and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : OthDiscr and time period: MD (1078473, 40)
Analyzing purpose: WorkBased and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form u

extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : School and time period: EV (136797, 40)
Analyzing purpose: Escort and time period: EV
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility f

In [36]:
# combine all trips into one file

# read trip files
final_trips = []

for tripPeriod, value in time_period.items():
    for purp in purpose:
        temp = pd.read_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        final_trips.append(temp)
        
final_trips = pd.concat(final_trips)
len(final_trips) 

36011057

In [None]:
# special rules

# Rule 1: Utility value of Drive Alone for all joint trips should be -999

#final_trips.loc[final_trips['trip_type'] == 'JNT', 'DA'] = -999

# Inbound and Outbound conditions for PNR # ot PNR_TRN_WLK it WLK_TRN_PNR
#final_trips.loc[(final_trips['trip_mode'] == 7) & (final_trips['inbound'] == 0), 'WLK_TRN_PNR'] = -999
#final_trips.loc[(final_trips['trip_mode'] == 7) & (final_trips['inbound'] == 1), 'PNR_TRN_WLK'] = -999

# Inbound and outbound conditions for KNR
#final_trips.loc[(final_trips['trip_mode'] == 8) & (final_trips['inbound'] == 0), 'WLK_TRN_KNR'] = -999
#final_trips.loc[(final_trips['trip_mode'] == 8) & (final_trips['inbound'] == 1), 'KNR_TRN_PNR'] = -999

# TourMode Availability
# all teh conditions 

# 

### Calculate the logsums

In [37]:
auto_nesting_coef = 0.72
trn_nesting_coef = 0.72
nm_nest_coef = 0.72
ridehail_nest_coef = 1

In [38]:
# Create logsum

final_trips['auto_ls'] = auto_nesting_coef * (np.log(np.exp(final_trips['DA']/auto_nesting_coef) + 
                                                     np.exp(final_trips['SR2']/auto_nesting_coef) + 
                                                     np.exp(final_trips['SR3']/auto_nesting_coef)))


final_trips['exp_trn'] = np.exp(final_trips['WLK_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_PNR']/trn_nesting_coef) + \
                               np.exp(final_trips['PNR_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_KNR']/trn_nesting_coef) + \
                               np.exp(final_trips['KNR_TRN_WLK']/trn_nesting_coef)

final_trips['trn_ls'] = np.where(final_trips['exp_trn'] > 0, trn_nesting_coef *(np.log(final_trips['exp_trn'])), 0)


final_trips['exp_nm'] = np.exp(final_trips['WALK']/nm_nest_coef) + \
                                  np.exp(final_trips['BIKE']/nm_nest_coef)
    
final_trips['non_mot_ls'] = np.where(final_trips['exp_nm'] > 0, nm_nest_coef * (np.log(final_trips['exp_nm'])), 0)

final_trips['exp_ridehail'] = np.exp(final_trips['RIDEHAIL']/ridehail_nest_coef)
final_trips['ridehail_ls'] = np.where(final_trips['exp_ridehail']>0, ridehail_nest_coef * (np.log(final_trips['exp_ridehail'])),0)

final_trips['allmode_ls'] = np.log(np.exp(final_trips['auto_ls']) + 
                                np.exp(final_trips['trn_ls']) + 
                                np.exp(final_trips['non_mot_ls']) + 
                                np.exp(final_trips['ridehail_ls']))

In [15]:
# Create logsum

In [39]:
final_trips['sum_ls'] = np.exp(final_trips['auto_ls']) + np.exp(final_trips['trn_ls']) +  np.exp(final_trips['non_mot_ls']) + np.exp(final_trips['ridehail_ls'])

In [40]:
#final_trips[final_trips['allmode_ls']<0][1:10]

### Get BETA IVT values for each purpose from UEC sheet

In [41]:
# get beta IVT for each purpose
ivt_purp = pd.DataFrame(columns=['util_purpose'])

for purp in purpose:
    print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    ivt = uec_purp.loc[uec_purp['Token']=='c_ivt', 'Formula for variable'].item()
    #ivt_lrt = uec_purp.loc[uec_purp['Token']=='c_ivt_lrt', 'Formula for variable'].item()
    #ivt_ferry = uec_purp.loc[uec_purp['Token']=='c_ivt_ferry', 'Formula for variable'].item()
    #ivt_exp = uec_purp.loc[uec_purp['Token']=='c_ivt_exp', 'Formula for variable'].item()
    #ivt_hvy = uec_purp.loc[uec_purp['Token']=='c_ivt_hvy', 'Formula for variable'].item()
    #ivt_com = uec_purp.loc[uec_purp['Token']=='c_ivt_com', 'Formula for variable'].item()
    
    ivt_purp = ivt_purp.append({'util_purpose': purp, 'b_ivt': ivt #'b_ivt_lrt': ivt_lrt,
                                #'b_ivt_ferry' : ivt_ferry, 'b_ivt_exp': ivt_exp, 
                                #'b_ivt_hvy': ivt_hvy, 'b_ivt_com': ivt_com
                               }, ignore_index=True)

Work
University
School
Escort
Shopping
EatOut
OthMaint
Social
OthDiscr
WorkBased


In [42]:
# merge with trip roster
final_trips = pd.merge(final_trips, ivt_purp, on = 'util_purpose', how='left')

In [43]:
# logsum benefits auto
final_trips['ls_benefit_auto'] =  (final_trips['allmode_ls'] * \
                                    (np.exp(final_trips['auto_ls'])/final_trips['sum_ls']) * \
                                        (final_trips['trips']/final_trips['b_ivt'])) #

In [44]:
# logsum benefits transit
final_trips['ls_benefit_transit'] = (final_trips['allmode_ls'] * \
                                         (np.exp(final_trips['trn_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* 

In [45]:
# logsum benefits ridehail
final_trips['ls_benefit_raidehail'] = (final_trips['allmode_ls'] * \
                                         (np.exp(final_trips['ridehail_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* final_trips['trips']

In [46]:
# logsum benefits non-motorized
final_trips['ls_benefit_nm'] =  (final_trips['allmode_ls'] * \
                                         (np.exp(final_trips['non_mot_ls'])/final_trips['sum_ls']) * \
                                            (final_trips['trips']/final_trips['b_ivt']))  #* final_trips['trips']

In [50]:
final_trips.columns

Index(['hh_id', 'person_id', 'inbound', 'orig_purpose', 'dest_purpose',
       'orig_taz', 'dest_taz', 'depart_hour', 'trip_mode', 'sampleRate',
       'trip_type', 'trips', 'transbay_od', 'orig_rdm_zones',
       'orig_super_dist', 'orig_county', 'dest_rdm_zones', 'dest_super_dist',
       'dest_county', 'home_zone', 'income', 'Income', 'pp_share',
       'link21_trip_purp', 'Period', 'Mode', 'util_purpose', 'orig', 'dest',
       'BIKE', 'DA', 'KNR_TRN_WLK', 'PNR_TRN_WLK', 'RIDEHAIL', 'SR2', 'SR3',
       'WALK', 'WLK_TRN_KNR', 'WLK_TRN_PNR', 'WLK_TRN_WLK', 'auto_ls',
       'exp_trn', 'trn_ls', 'exp_nm', 'non_mot_ls', 'exp_ridehail',
       'ridehail_ls', 'allmode_ls', 'sum_ls', 'b_ivt', 'ls_benefit_auto',
       'ls_benefit_transit', 'ls_benefit_raidehail', 'ls_benefit_nm'],
      dtype='object')

In [20]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['allmode_ls'].mean().reset_index()
super_dist.to_csv("super_dist_composite_utility_baseline2050.csv", index=False)

In [30]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['allmode_ls'].mean().reset_index()
super_dist.to_csv("super_dist_composite_utility_baseline2050_2.csv", index=False)

In [31]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['sum_ls'].mean().reset_index()
super_dist.to_csv("super_dist_composite_utility_baseline2050_3.csv", index=False)

In [32]:
# superdistrict average composite utility
super_dist = final_trips.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].sum().reset_index()
super_dist.to_csv("super_dist_trips_baseline.csv")

In [44]:
bl = bl.add_suffix("_bl")
r39 = r39.add_suffix("_r39")

In [45]:
taz_df = pd.merge(bl, r39, left_on=['orig_taz_bl', 'dest_taz_bl', 'Period_bl'],
                 right_on = ['orig_taz_r39', 'dest_taz_r39', 'Period_r39'], how = 'left')

In [46]:
taz_df = taz_df.drop(columns=['orig_taz_r39', 'dest_taz_r39', 'Period_r39'])

In [48]:
taz_df['delta_trn'] = taz_df['ls_benefit_transit_r39'] - taz_df['ls_benefit_transit_bl']
taz_df['delta_auto'] = taz_df['ls_benefit_auto_r39'] - taz_df['ls_benefit_auto_bl']
taz_df['delta_rh'] = taz_df['ls_benefit_raidehail_r39'] - taz_df['ls_benefit_raidehail_bl']
taz_df['delta_nm'] = taz_df['ls_benefit_nm_r39'] - taz_df['ls_benefit_nm_bl']

In [49]:
taz_df = taz_df.fillna(0)

In [64]:
def create_finaldf(df, summarize_col = 'delta_auto', col_name='auto'):
    temp1 = df.groupby(['orig_taz_bl', 'Period_bl'])[summarize_col].sum().reset_index()
    temp1 = temp1.rename(columns={'orig_taz_bl': 'taz', 'summarize_col' : col_name})
    temp1 = pd.pivot(temp1, index='taz', columns='Period_bl', values=summarize_col)
    temp1 = temp1.add_suffix('_orig_' + col_name)
    
    temp2 = df.groupby(['dest_taz_bl', 'Period_bl'])[summarize_col].sum().reset_index()
    temp2 = temp2.rename(columns={'dest_taz_bl': 'taz'})
    temp2 = pd.pivot(temp2, index='taz', columns='Period_bl', values=summarize_col)
    temp2 = temp2.add_suffix('_dest_' + col_name)
    
    temp = temp1.merge(temp2, on = 'taz', how='left')
    return temp

In [65]:
auto = create_finaldf(taz_df, summarize_col = 'delta_auto', col_name='auto')
trn = create_finaldf(taz_df, summarize_col = 'delta_trn', col_name='trn')
rh = create_finaldf(taz_df, summarize_col = 'delta_rh', col_name='rh')
nm = create_finaldf(taz_df, summarize_col = 'delta_nm', col_name='nm')

In [66]:
final_df = auto.merge(trn, on='taz' , how='left').merge(
                    rh, on = 'taz', how='left').merge(
                    nm, on = 'taz', how='left')

In [67]:
final_df

Period_bl,am_orig_auto,ea_orig_auto,ev_orig_auto,md_orig_auto,pm_orig_auto,am_dest_auto,ea_dest_auto,ev_dest_auto,md_dest_auto,pm_dest_auto,...,am_orig_nm,ea_orig_nm,ev_orig_nm,md_orig_nm,pm_orig_nm,am_dest_nm,ea_dest_nm,ev_dest_nm,md_dest_nm,pm_dest_nm
taz,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-239.568130,0.029933,-7.694765,-73133.610320,2657.773086,2674.083728,-0.378113,221.732873,-1.885918e+06,1513.012489,...,155.308373,0.296733,9.866771,86451.276721,-2570.828065,-3809.924545,1.507447,-266.001211,2.834122e+06,-2097.497589
2,-12904.441474,-4891.800444,-28580.429724,432.965479,-4312.440663,-22605.627415,-19331.080720,-24513.129493,2.052860e+04,275.027471,...,15900.322403,7010.773136,43282.853783,-164.590328,3646.256265,27993.000054,22937.188877,32647.564538,-2.428496e+04,524.553331
3,-13875.937658,-6858.966451,-3730.051409,-1550.303217,27038.693943,-2593.489995,-218779.651228,-1990.303405,2.899025e+03,-200492.527249,...,20141.023152,8338.742153,4252.224056,-5031.211392,-36259.136405,3059.649421,289547.117749,4209.628798,-3.288529e+03,267815.870974
4,37743.994938,-83.470182,-2043.479495,97.181312,1054.839201,13542.790425,-6138.441654,-33828.107202,3.046952e+04,1765.586707,...,-40511.452358,121.453167,-2208.339562,-249.044583,-1377.427222,-18978.404002,-0.031302,2501.522086,-4.333135e+04,-2466.131325
5,-46828.871364,-984.102795,-141380.515496,-12351.563856,15039.714424,8139.829698,508913.856571,-4482.700212,-1.428478e+03,15203.105996,...,66820.530642,1234.919844,212596.991607,21212.725169,-19111.756774,-14220.932009,-793714.305080,7030.352921,2.666760e+03,-21836.710699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3328,-10.603963,-0.021244,-0.853735,1.171191,2.057848,2.484099,0.019212,-0.876731,-4.848381e+00,65.701588,...,-82.290782,-1.490387,3.491575,-2.916510,-4.063941,-2.013414,0.298852,-0.030814,8.034692e+00,-269.251244
3329,268.715985,0.037999,8797.376236,324.268970,-8101.625370,429.362453,-0.678482,96.042872,3.443828e+03,477.391044,...,-175.572451,-1.359753,21.517912,-25.842932,19.451375,-169.724392,1.504056,-0.477972,-4.094457e+01,-6.892315
3330,-114.174950,17.217313,129.932606,43.939800,-112.823626,482.616410,1386.207143,421.666729,3.949421e+02,99081.133906,...,70.974397,-11.611042,-80.879281,64.092999,100.188457,-327.861585,-979.033905,-12.977589,-2.140420e+02,-72268.271222
3331,-4.463736,-7.304887,176.329860,5339.984559,6.269887,-80.699623,467.076112,148.931111,1.075127e+02,102.807992,...,-10.639822,5.762648,-133.115402,-5909.207679,3.400789,141.360094,-385.026363,-4.717116,-1.186447e+01,-130.183439


In [68]:
final_df.to_csv("benefits_by_taz.csv")

### Add Travel Time to Trip Roster

In [17]:
time_periods = params['periods']

#auto_trips = final_trips.loc[final_trips['trip_mode'].isin([1,2,3,9])]
#trn_trips = final_trips.loc[final_trips['trip_mode'].isin([6,7,8])]
#nm_trips = final_trips.loc[final_trips['trip_mode'].isin([4,5])]

In [18]:
from itertools import product

# Define the values for the first two columns
column1 = range(1, 3333)
column2 = range(1, 3333)

# Create a list of all combinations
combinations = list(product(column1, column2))
tt_auto = pd.DataFrame(combinations, columns=['orig', 'dest'])

In [23]:
%%time
#highway skims
periods = ['AM', 'MD', 'PM', 'EA', 'EV']
cores = ['TIMEDAH']

#tt_auto = []
for period in periods:
    for core in cores:
        print(period, core)
        hwy_skim = omx.open_file(_join(hwy_skims_dir, 'HWYSKM' + period +'.omx'))
        time_od = skim_core_to_df(hwy_skim, core, cols =['orig', 'dest', core+'_'+period])
        tt_auto = pd.merge(tt_auto, time_od, on = ['orig', 'dest'], how='left')
        
tt_auto.to_parquet(_join(preprocess_dir, 'auto_travel_time.parquet'))
tt_auto.columns=['orig_taz', 'dest_taz', 'AM', 'MD', 'PM', 'EA', 'EV']

AM TIMEDAH
MD TIMEDAH
PM TIMEDAH
EA TIMEDAH
EV TIMEDAH
Wall time: 49.9 s


In [24]:
%%time
tt_auto = pd.read_parquet(_join(preprocess_dir, 'auto_travel_time.parquet'))
tt_auto.columns=['orig_taz', 'dest_taz', 'AM', 'MD', 'PM', 'EA', 'EV']

print(len(final_trips))
auto_temp = []

for period in time_periods:
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    print(period, len(trips_pd))
    tt_pd = tt_auto[['orig_taz', 'dest_taz', period.upper()]]
    auto_trips_pd = pd.merge(trips_pd, tt_pd, 
                        on =['orig_taz', 'dest_taz'], how ='left')
    
    auto_trips_pd = auto_trips_pd.rename(columns={period.upper(): 'auto_time'})
    auto_temp.append(auto_trips_pd)

final_trips = pd.concat(auto_temp)
print(len(final_trips))

36011057
am 8461510
md 10621188
pm 10812800
ev 5545409
ea 570150
36011057
Wall time: 3min 18s


In [25]:
%%time
# Create a list of all combinations
combinations = list(product(column1, column2))
tt_nm = pd.DataFrame(combinations, columns=['orig', 'dest'])

nm_skm = omx.open_file(_join(model_outputs_dir, r'skims\active\nonmotskm.omx'))
time_od = skim_core_to_df(nm_skm, 'DISTWALK', cols =['orig', 'dest', 'DISTWALK'])
tt_nm = pd.merge(tt_nm, time_od, on = ['orig', 'dest'], how='left')

time_od = skim_core_to_df(nm_skm, 'DISTBIKE', cols =['orig', 'dest', 'DISTBIKE'])
tt_nm = pd.merge(tt_nm, time_od, on = ['orig', 'dest'], how='left')

tt_nm.loc[tt_nm['DISTBIKE']==1000000.0, 'DISTBIKE'] = 0
tt_nm.loc[tt_nm['DISTWALK']==1000000.0, 'DISTWALK'] = 0

tt_nm['biketime'] = tt_nm['DISTBIKE'] * 60 / 12
tt_nm['walktime'] = tt_nm['DISTWALK'] * 60 / 3

tt_nm = tt_nm.drop(columns=['DISTBIKE', 'DISTWALK'])


tt_nm.to_parquet(_join(preprocess_dir, 'nm_travel_time.parquet'))

Wall time: 27.9 s


In [26]:
# Non Motorized mode travel time
tt_nm = pd.read_parquet(_join(preprocess_dir, 'nm_travel_time.parquet'))
tt_nm.columns = ['orig_taz', 'dest_taz', 'biketime', 'walktime']


#Merge with trip roster
final_trips = pd.merge(final_trips, tt_nm, on=['orig_taz', 'dest_taz'], how='left')
print(len(final_trips))

36011057


In [27]:
%%time

#create IVT values for all transit options

acc_egg_modes = params['access_egress_modes']
#create attributes for all time periods and access and egress modes

for acc_egr in acc_egg_modes:
    combinations = list(product(column1, column2))
    tt_trn = pd.DataFrame(combinations, columns=['orig', 'dest'])
    for period in time_periods:
        file_name = _join(model_outputs_dir, r'skims\transit', 'trnskm' + period.lower()+'_'+acc_egr+'.omx')
        print(file_name)
        skim = omx.open_file(file_name)
        ivt = skim_core_to_df(skim, 'IVT', cols =['orig', 'dest', period.lower()])
        tt_trn = pd.merge(tt_trn, ivt, on = ['orig', 'dest'], how='left')
        skim.close()
        
    tt_trn.to_parquet(_join(preprocess_dir, 'trnskm' +'_'+ acc_egr +'_cores.parquet'))

C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmam_WLK_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmmd_WLK_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmpm_WLK_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmev_WLK_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmea_WLK_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmam_KNR_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmmd_KNR_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmpm_KNR_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\transit\trnskmev_KNR_TRN_WLK.omx
C:\MTC_tmpy\TM2_2050Baseline_R2_Run3\tm2py\examples\Link21_3332\skims\tra

In [28]:
%%time
print(len(final_trips))
acc_egr = 'WLK_TRN_WLK'
df_trn_skim = pd.read_parquet(_join(preprocess_dir, 'trnskm_'+acc_egr+'_cores.parquet'))

temp = []
for period in time_periods:
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    tt_pd = df_trn_skim[['orig', 'dest', period.lower()]]
    tt_pd.columns = ['orig_taz', 'dest_taz', period.lower()]
    tt_trips_pd = pd.merge(trips_pd, tt_pd, 
                           on =['orig_taz', 'dest_taz'], 
                            how ='left')

    tt_trips_pd = tt_trips_pd.rename(columns={period.lower(): acc_egr+'_time'})
    temp.append(tt_trips_pd)

final_trips = pd.concat(temp)
print(len(final_trips))

36011057
36011057
Wall time: 3min 34s


In [29]:
%%time
print(len(final_trips))
acc_egr = 'KNR_TRN_WLK'
df_trn_skim = pd.read_parquet(_join(preprocess_dir, 'trnskm_'+acc_egr+'_cores.parquet'))

temp = []
for period in time_periods:
    print(period)
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    tt_pd = df_trn_skim[['orig', 'dest', period.lower()]]
    tt_pd.columns = ['orig_taz', 'dest_taz', period.lower()]
    tt_trips_pd = pd.merge(trips_pd, tt_pd, 
                           on =['orig_taz', 'dest_taz'], 
                            how ='left')

    tt_trips_pd = tt_trips_pd.rename(columns={period.lower(): acc_egr+'_time'})
    temp.append(tt_trips_pd)

final_trips = pd.concat(temp)
print(len(final_trips))

36011057
am
md
pm
ev
ea
36011057
Wall time: 3min 38s


In [30]:
%%time
print(len(final_trips))
acc_egr = 'WLK_TRN_KNR'
df_trn_skim = pd.read_parquet(_join(preprocess_dir, 'trnskm_'+acc_egr+'_cores.parquet'))

temp = []
for period in time_periods:
    print(period)
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    tt_pd = df_trn_skim[['orig', 'dest', period.lower()]]
    tt_pd.columns = ['orig_taz', 'dest_taz', period.lower()]
    tt_trips_pd = pd.merge(trips_pd, tt_pd, 
                           on =['orig_taz', 'dest_taz'], 
                            how ='left')

    tt_trips_pd = tt_trips_pd.rename(columns={period.lower(): acc_egr+'_time'})
    temp.append(tt_trips_pd)

final_trips = pd.concat(temp)
print(len(final_trips))

36011057
am
md
pm
ev
ea
36011057
Wall time: 3min 44s


In [31]:
%%time
print(len(final_trips))
acc_egr = 'PNR_TRN_WLK'
df_trn_skim = pd.read_parquet(_join(preprocess_dir, 'trnskm_'+acc_egr+'_cores.parquet'))

temp = []
for period in time_periods:
    print(period)
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    tt_pd = df_trn_skim[['orig', 'dest', period.lower()]]
    tt_pd.columns = ['orig_taz', 'dest_taz', period.lower()]
    tt_trips_pd = pd.merge(trips_pd, tt_pd, 
                           on =['orig_taz', 'dest_taz'], 
                            how ='left')

    tt_trips_pd = tt_trips_pd.rename(columns={period.lower(): acc_egr+'_time'})
    temp.append(tt_trips_pd)

final_trips = pd.concat(temp)
print(len(final_trips))

36011057
am
md
pm
ev
ea
36011057
Wall time: 3min 46s


In [32]:
%%time
print(len(final_trips))
acc_egr = 'WLK_TRN_PNR'
df_trn_skim = pd.read_parquet(_join(preprocess_dir, 'trnskm_'+acc_egr+'_cores.parquet'))

temp = []
for period in time_periods:
    print(period)
    trips_pd = final_trips.loc[final_trips['Period']==period.lower()]
    tt_pd = df_trn_skim[['orig', 'dest', period.lower()]]
    tt_pd.columns = ['orig_taz', 'dest_taz', period.lower()]
    tt_trips_pd = pd.merge(trips_pd, tt_pd, 
                           on =['orig_taz', 'dest_taz'], 
                            how ='left')

    tt_trips_pd = tt_trips_pd.rename(columns={period.lower(): acc_egr+'_time'})
    temp.append(tt_trips_pd)

final_trips = pd.concat(temp)
print(len(final_trips))

36011057
am
md
pm
ev
ea
36011057
Wall time: 3min 48s


In [33]:
final_trips.to_parquet(_join(preprocess_dir, 'trips_ls.parquet'))

In [5]:
final_trips = pd.read_parquet(_join(preprocess_dir, 'trips_ls.parquet'))

In [6]:
final_trips.columns

Index(['hh_id', 'person_id', 'inbound', 'orig_purpose', 'dest_purpose',
       'orig_taz', 'dest_taz', 'depart_hour', 'trip_mode', 'sampleRate',
       'trip_type', 'trips', 'transbay_od', 'orig_rdm_zones',
       'orig_super_dist', 'orig_county', 'dest_rdm_zones', 'dest_super_dist',
       'dest_county', 'home_zone', 'income', 'Income', 'pp_share',
       'link21_trip_purp', 'Period', 'Mode', 'util_purpose', 'orig', 'dest',
       'BIKE', 'DA', 'KNR_TRN_WLK', 'PNR_TRN_WLK', 'RIDEHAIL', 'SR2', 'SR3',
       'WALK', 'WLK_TRN_KNR', 'WLK_TRN_PNR', 'WLK_TRN_WLK', 'auto_ls',
       'exp_trn', 'trn_ls', 'exp_nm', 'non_mot_ls', 'exp_ridehail',
       'ridehail_ls', 'allmode_ls', 'sum_ls', 'b_ivt', 'ls_benefit_auto',
       'ls_benefit_transit', 'ls_benefit_raidehail', 'ls_benefit_nm',
       'auto_time', 'biketime', 'walktime', 'WLK_TRN_WLK_time',
       'KNR_TRN_WLK_time', 'WLK_TRN_KNR_time', 'PNR_TRN_WLK_time',
       'WLK_TRN_PNR_time'],
      dtype='object')

In [133]:
chk  = final_trips[(final_trips['orig_taz'] == 2228) & (final_trips['dest_taz'] == 3017)]
chk['scenario'] = 'baseline'
chk.to_csv("chk_baseline.csv")

In [35]:
final_trips.groupby(['trip_mode'])['trips'].sum()

trip_mode
1    13850189.0
2     7735766.0
3     5138734.0
4     5113895.0
5     2107952.0
6     2118686.0
7      378038.0
8      283292.0
9      421593.0
Name: trips, dtype: float64

In [None]:
#421000

In [55]:
import pandas as pd
pd.options.display.float_format = '{:,.3f}'.format

In [56]:
summary_col = 'ls_benefit_transit'
mode_numbers = [6,7,8]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
trn = df_region_period[['Period', 'Value']]
trn.columns = ['Period', 'trn']
#df_region_period.to_csv('trn_ls.csv')

summary_col = 'ls_benefit_auto'
mode_numbers = [1,2,3]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
auto = df_region_period[['Period', 'Value']]
auto.columns = ['Period', 'auto']
#df_region_period.to_csv('auto_ls.csv')

summary_col = 'ls_benefit_nm'
mode_numbers = [4,5]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
nm = df_region_period[['Period', 'Value']]
nm.columns = ['Period', 'nm']
#df_region_period.to_csv('nm_ls.csv')

summary_col = 'ls_benefit_raidehail'
mode_numbers = [9]
temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
df_region_period = summarize_all_combinations(temp, groupby_columns=['Period'], summary_column=summary_col)
rh = df_region_period[['Period', 'Value']]
rh.columns = ['Period', 'rh']
#df_region_period.to_csv('rh_ls.csv')

In [57]:
final = pd.merge(trn, auto, on = 'Period').merge(
                    nm, on='Period').merge(
                    rh, on='Period')

In [58]:
final.to_csv('ls_benefits.csv')

In [59]:
final

Unnamed: 0,Period,trn,auto,nm,rh
0,All,-43806271.31,-570180151.703,-70192528.376,-3645000.404
1,am,-12145049.65,-132205286.653,-15058432.685,-966416.307
2,ea,-1129129.338,-9607720.843,-625793.151,-53440.779
3,ev,-7623587.357,-91857701.75,-9319321.698,-581307.802
4,md,-9546558.001,-168592259.26,-24680848.954,-1025276.867
5,pm,-13361946.964,-167917183.197,-20508131.888,-1018558.649


In [85]:
test = final_trips.copy()

In [86]:
%%time
## Add distance 

hwy_skim = omx.open_file(_join(hwy_skims_dir, 'HWYSKMMD.omx'))
dist_od = skim_core_to_df(hwy_skim, 'DISTDAH', cols =['orig_taz', 'dest_taz', 'DISTDAH'])

Wall time: 4.8 s


In [87]:
combinations = list(product(column1, column2))
tt_nm = pd.DataFrame(combinations, columns=['orig_taz', 'dest_taz'])

nm_skm = omx.open_file(_join(model_outputs_dir, r'skims\active\nonmotskm.omx'))
time_od = skim_core_to_df(nm_skm, 'DISTWALK', cols =['orig_taz', 'dest_taz', 'DISTWALK'])
tt_nm = pd.merge(tt_nm, time_od, on = ['orig_taz', 'dest_taz'], how='left')

time_od = skim_core_to_df(nm_skm, 'DISTBIKE', cols =['orig_taz', 'dest_taz', 'DISTBIKE'])
tt_nm = pd.merge(tt_nm, time_od, on = ['orig_taz', 'dest_taz'], how='left')

tt_nm.loc[tt_nm['DISTBIKE']==1000000.0, 'DISTBIKE'] = 0
tt_nm.loc[tt_nm['DISTWALK']==1000000.0, 'DISTWALK'] = 0

In [88]:
test = pd.merge(test, dist_od, on=['orig_taz', 'dest_taz'], how='left')

In [89]:
test = pd.merge(test, tt_nm, on=['orig_taz', 'dest_taz'], how='left')

In [90]:
test['biketime'] = test['DISTBIKE'] * 60 / 12
test['walktime'] = test['DISTWALK'] * 60 / 3

In [78]:
test_auto = final_trips.loc[final_trips['trip_mode'].isin([1,2,3])]
test_auto = test_auto.sort_values(by='ls_benefit_auto')
test1 = test_auto[0:100]
test2 = test_auto.tail(100)

test3 = pd.concat([test1, test2])
test3.to_csv('check_auto_baseline.csv')

In [79]:
test_trn = final_trips.loc[final_trips['trip_mode'].isin([6,7,8])]
test_trn = test_trn.sort_values(by='ls_benefit_transit')
test1 = test_trn[0:100]
test2 = test_trn.tail(100)

test3 = pd.concat([test1, test2])
test3.to_csv('check_transit_baseline.csv')

In [91]:
test_nm = final_trips.loc[final_trips['trip_mode'].isin([4,5])]
test_nm = test_nm.sort_values(by='ls_benefit_nm')
test1 = test_nm[0:100]
test2 = test_nm.tail(100)

test3 = pd.concat([test1, test2])
test3.to_csv('check_nm_baseline.csv')

In [106]:
random_rows = test_nm.sample(n=500)
#random_200_rows.to_csv('check_nm_baseline.csv')

In [109]:
random_rows['od_id'] = [str(x) + '-' + str(y) for x,y in zip(random_rows['orig_taz'], random_rows['dest_taz'])] #str(random_200_rows['orig_taz']) + '-' + str(random_200_rows['dest_taz'])

In [110]:
random_rows

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,...,ls_benefit_nm,auto_time,biketime,walktime,WLK_TRN_WLK_time,KNR_TRN_WLK_time,WLK_TRN_KNR_time,PNR_TRN_WLK_time,WLK_TRN_PNR_time,od_id
6874210,1985680,4668551.000,1,eatout,Home,69,401,17,4,1.000,...,-10.985,1.309,7.313,1.828,0.000,269.218,269.218,950.095,2233.778,69-401
5931259,687468,1632432.000,0,social,eatout,1626,1635,12,4,1.000,...,13.414,2.220,15.729,3.932,0.000,287.252,460.535,1064.048,1138.641,1626-1635
7589352,172358,322367.000,1,eatout,othdiscr,2276,2154,18,4,1.000,...,119.278,5.230,25.975,6.494,567.975,177.586,127.670,615.387,619.970,2276-2154
8644735,701476,1672667.000,0,Home,othdiscr,1637,1707,11,5,1.000,...,-67.684,7.726,42.602,10.650,0.000,400.205,355.203,400.205,2690.128,1637-1707
3884769,2265328,5325021.000,0,othmaint,shopping,612,64,11,4,1.000,...,15.737,3.204,17.587,4.393,0.000,69.852,112.914,1103.020,1102.581,612-64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7115714,235732,446878.000,1,eatout,Home,2341,2316,16,4,1.000,...,151.100,3.706,23.840,5.646,469.265,466.312,249.950,500.247,469.265,2341-2316
5244835,2614659,6213205.000,1,othdiscr,othmaint,771,791,19,4,1.000,...,27.999,2.420,19.273,4.770,0.000,129.030,310.209,300.578,271.720,771-791
1963545,3441527,8288899.000,1,escort,Home,1272,1041,11,5,1.000,...,-81.491,8.589,51.112,11.564,1554.601,257.686,816.148,2099.059,703.493,1272-1041
5654864,2037023,4759552.000,0,Home,shopping,212,205,15,4,1.000,...,20.430,2.445,17.244,4.263,0.000,262.001,388.273,2041.461,2142.806,212-205


In [95]:
r39_run = pd.read_parquet('test_R39.parquet')

In [126]:
r39_run.isna().sum()

hh_id                      0
person_id             780177
inbound                    0
orig_purpose               0
dest_purpose               0
                      ...   
WLK_TRN_PNR_time           0
DISTDAH                    0
DISTWALK            35903741
DISTBIKE            35903741
od_id                      0
Length: 66, dtype: int64

In [111]:
test_nm39 = r39_run.loc[r39_run['trip_mode'].isin([4,5])]
test_nm39['od_id'] = [str(x) + '-' + str(y) for x,y in zip(test_nm39['orig_taz'], test_nm39['dest_taz'])] 

In [112]:
unique_od = list(random_rows['od_id'].unique())

In [113]:
unique_od

['69-401',
 '1626-1635',
 '2276-2154',
 '1637-1707',
 '612-64',
 '14-62',
 '1803-1801',
 '3015-2977',
 '835-808',
 '31-608',
 '544-595',
 '76-91',
 '1877-1877',
 '2653-2724',
 '2468-2567',
 '955-955',
 '1128-1241',
 '3109-3109',
 '2543-2543',
 '1366-1237',
 '1118-1118',
 '955-952',
 '73-37',
 '79-79',
 '2244-2085',
 '2352-2274',
 '2233-2164',
 '266-427',
 '2943-2939',
 '2992-2992',
 '697-681',
 '1790-1765',
 '1566-1566',
 '865-865',
 '2185-2018',
 '103-30',
 '3190-3190',
 '955-1012',
 '2205-2272',
 '2116-2116',
 '1046-1046',
 '2745-2728',
 '1070-1047',
 '86-17',
 '3124-3124',
 '3137-3137',
 '225-224',
 '587-576',
 '2812-2812',
 '1522-1308',
 '2291-2291',
 '214-217',
 '2290-2354',
 '2362-2417',
 '174-150',
 '1164-1164',
 '2542-2542',
 '3197-3197',
 '1422-1432',
 '413-339',
 '10-79',
 '600-600',
 '14-16',
 '2970-2970',
 '3130-3130',
 '2377-2403',
 '3183-3183',
 '1243-1243',
 '2775-2775',
 '3038-2987',
 '1330-1410',
 '2084-2084',
 '454-605',
 '1204-1191',
 '2689-2668',
 '192-192',
 '2855-

In [116]:
rows = test_nm39[test_nm39['od_id'].isin(unique_od)]

In [117]:
rows.shape

(1023479, 66)

In [118]:
rows = rows[0:500]

In [119]:
rows['scenario'] = 'R39'

In [120]:
random_rows['scenario'] = 'baseline2050'

In [121]:
final_test = pd.concat([rows, random_rows])

In [122]:
final_test.to_csv('check_nm_combined.csv')

In [124]:
final_test.isna().sum()

hh_id              0
person_id          1
inbound            0
orig_purpose       0
dest_purpose       0
                ... 
DISTDAH          500
DISTWALK        1000
DISTBIKE        1000
od_id              0
scenario           0
Length: 67, dtype: int64

## Creating Summaries

In [51]:
# Regional Value

def create_summaries(final_trips, summary_col, filename_verbose, metric_num, filename_extension, mode_numbers):
    
    temp = final_trips.loc[final_trips['trip_mode'].isin(mode_numbers)]
    # Region
    df_region_period = summarize_all_combinations(final_trips, groupby_columns=['Period'], summary_column=summary_col)
    df_region_period = df_region_period[['Period', 'Value']]

    df_region_period['Concept_ID'] = concept_id
    df_region_period['Metric_ID'] = metric_num
    df_region_period['Metric_name'] = 'Travel time savings'
    df_region_period['Submetric'] = metric_num + '.1'
    df_region_period['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_region_period['Population'] = 'Whole Population'
    df_region_period['Geography'] = 'Region'
    df_region_period['Zone_ID'] = ''
    df_region_period['Income'] = ''
    df_region_period['Mode'] = ''
    df_region_period['Purpose'] = ''
    df_region_period['Origin_zone'] = ''
    df_region_period['Dest_zone'] = ''
    df_region_period['Units'] = 'minutes'
    df_region_period['Total_Increment'] = ''
    
    # County
    df_cnty = summarize_all_combinations(final_trips, groupby_columns=['orig_county', 'dest_county', 'Period'], 
                                           summary_column=summary_col)

    df_cnty = df_cnty.rename(columns={ 
                                      'orig_county' : 'Origin_zone',
                                      'dest_county' : 'Dest_zone'})
    df_cnty = df_cnty[['Origin_zone', 'Dest_zone',  'Period', 'Value']]

    df_cnty['Concept_ID'] = concept_id
    df_cnty['Metric_ID'] = metric_num
    df_cnty['Metric_name'] = 'Travel time savings'
    df_cnty['Submetric'] =  metric_num + '.2'
    df_cnty['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination county'
    df_cnty['Population'] = 'Whole Population'
    df_cnty['Geography'] = 'County'
    df_cnty['Zone_ID'] = ''
    df_cnty['Income'] = ''
    df_cnty['Mode'] = ''
    df_cnty['Purpose'] = ''
    df_cnty['Units'] = 'minutes'
    df_cnty['Total_Increment'] = ''
    
    # RDM
    df_rdm = summarize_all_combinations(final_trips, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period'], 
                                           summary_column=summary_col)

    df_rdm = df_rdm.rename(columns={ 
                                    'orig_rdm_zones' : 'Origin_zone',
                                    'dest_rdm_zones' : 'Dest_zone'})

    df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_rdm['Concept_ID'] = concept_id
    df_rdm['Metric_ID'] = metric_num
    df_rdm['Metric_name'] = 'Travel time savings'
    df_rdm['Submetric'] =  metric_num + '.3'
    df_rdm['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination RDM zone'
    df_rdm['Population'] = 'Whole Population'
    df_rdm['Geography'] = 'RDM'
    df_rdm['Zone_ID'] = ''
    df_rdm['Income'] = ''
    df_rdm['Mode'] = ''
    df_rdm['Purpose'] = ''
    df_rdm['Units'] = 'minutes'
    df_rdm['Total_Increment'] = ''
    
    # Super Districts
    df_sd = summarize_all_combinations(final_trips, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period'], 
                                           summary_column=summary_col)

    df_sd = df_sd.rename(columns={ 
                                  'orig_super_dist' : 'Origin_zone',
                                  'dest_super_dist' : 'Dest_zone'})
    df_sd = df_sd[['Origin_zone', 'Dest_zone', 'Period', 'Value']]

    df_sd['Concept_ID'] = concept_id
    df_sd['Metric_ID'] = metric_num
    df_sd['Metric_name'] = 'Travel time savings'
    df_sd['Submetric'] =  metric_num + '.4'
    df_sd['Description'] = 'Travel time savings for new and existing users by primary mode in origin and destination super district'
    df_sd['Population'] = 'Whole Population'
    df_sd['Geography'] = 'Super district'
    df_sd['Zone_ID'] = ''
    df_sd['Income'] = ''
    df_sd['Mode'] = ''
    df_sd['Purpose'] = ''
    df_sd['Units'] = 'trips'
    df_sd['Total_Increment'] = ''
    
    # Prioirty Population
    final_trips['pp_wtd_benefit'] = final_trips[summary_col] * final_trips['pp_share']/100
    df_pp = summarize_all_combinations(final_trips, groupby_columns=['Period'], summary_column='pp_wtd_benefit')
    df_pp = df_pp[['Period', 'Value']]

    df_pp['Concept_ID'] = concept_id
    df_pp['Metric_ID'] = metric_num
    df_pp['Metric_name'] = 'Travel time savings'
    df_pp['Submetric'] =  metric_num + '.5'
    df_pp['Description'] = 'Travel time savings for new and existing users by primary mode'
    df_pp['Population'] = 'Prioirty population'
    df_pp['Geography'] = 'Region'
    df_pp['Zone_ID'] = ''
    df_pp['Origin_zone'] = ''
    df_pp['Dest_zone'] = ''
    df_pp['Income'] = ''
    df_pp['Mode'] = ''
    df_pp['Purpose'] = ''
    df_pp['Units'] = 'trips'
    df_pp['Total_Increment'] = ''
    
    all_dfs = [df_region_period, df_cnty, df_rdm, df_sd, df_pp]

    for dfs in all_dfs:
        metric_name = filename_verbose #'_travel_time_auto_savings_'
        dfs = dfs.reset_index(drop=True)
        dfs = dfs[perf_measure_columns]
        file_name = dfs['Submetric'][0]
        geography = '_' + dfs['Geography'][0].replace(' ', '_')
        dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
        print(len(dfs), file_name, dfs['Metric_name'][0])

    combined_df = pd.concat([df_region_period, df_cnty, df_rdm, df_sd, df_pp]).reset_index(drop=True)
    combined_df.to_csv(_join(summary_dir,  metric_num + filename_verbose + concept_id + '_region' +filename_extension + '.csv'), index=None)

In [58]:
create_summaries(final_trips,'ls_benefit_transit', '_travel_time_savings_transit_', 'E1.1', filename_extension, mode_numbers=[6,7,8])

6 E1.1.1 Travel time savings
600 E1.1.2 Travel time savings
280958 E1.1.3 Travel time savings
7000 E1.1.4 Travel time savings
6 E1.1.5 Travel time savings


In [59]:
create_summaries(final_trips,'ls_benefit_auto', '_travel_time_savings_auto_', 'E1.2', filename_extension,  mode_numbers=[1,2,3])

6 E1.2.1 Travel time savings
600 E1.2.2 Travel time savings
280958 E1.2.3 Travel time savings
7000 E1.2.4 Travel time savings
6 E1.2.5 Travel time savings


In [60]:
create_summaries(final_trips, 'ls_benefit_raidehail', '_travel_time_savings_ridehail_', 'E1.3', filename_extension, mode_numbers=[9])

6 E1.3.1 Travel time savings
600 E1.3.2 Travel time savings
280958 E1.3.3 Travel time savings
7000 E1.3.4 Travel time savings
6 E1.3.5 Travel time savings


In [61]:
create_summaries(final_trips, 'ls_benefit_nm', '_travel_time_savings_non-motorized', 'E1.4', filename_extension, mode_numbers=[4,5])

6 E1.4.1 Travel time savings
600 E1.4.2 Travel time savings
280958 E1.4.3 Travel time savings
7000 E1.4.4 Travel time savings
6 E1.4.5 Travel time savings


## Effective Density Calculations

In [56]:
common_dir = params['common_dir']
decay_param_goods = 1.8
decay_param_services = 1.9

In [57]:
emp_data = pd.read_csv(_join(common_dir, 'employments_link21_15categories.csv'))

In [62]:
#emp_data['sector'] =  emp_data['link21'].map(emp_dict)
emp_data = emp_data.groupby(['TAZ'])['jobs'].sum().reset_index()
#emp_data = pd.pivot(emp_data, index='TAZ', columns='sector' , values='jobs').reset_index()

In [63]:
od_logsums = final_trips.groupby(['orig_taz', 'dest_taz'])['allmode_ls'].mean().reset_index()

In [64]:
od_logsums = pd.merge(od_logsums, emp_data, left_on='dest_taz', right_on='TAZ', how='left')
od_logsums['jobs'] = od_logsums['jobs'].fillna(0)

In [65]:
od_logsums['alpha_goods'] = decay_param_goods
od_logsums['alpha_services'] = decay_param_services

od_logsums['effective_density_good'] = od_logsums['jobs'] / (od_logsums['alpha_goods'] * od_logsums['allmode_ls'])
od_logsums['effective_density_services'] = od_logsums['jobs'] / (od_logsums['alpha_services'] * od_logsums['allmode_ls'])

In [66]:
od_logsums_orig = od_logsums.groupby(['orig_taz'])['effective_density_good', 'effective_density_services'].sum().reset_index()

In [67]:
sector_column = ['effective_density_good', 'effective_density_services']
sector_column = 'effective_density_good'

In [68]:
od_logsums_orig

Unnamed: 0,orig_taz,effective_density_good,effective_density_services
0,1,1.381605e+06,1.308889e+06
1,2,1.510187e+06,1.430704e+06
2,3,2.045383e+06,1.937731e+06
3,4,1.189731e+06,1.127114e+06
4,5,1.817002e+06,1.721370e+06
...,...,...,...
3316,3328,1.685454e+06,1.596746e+06
3317,3329,1.059997e+06,1.004208e+06
3318,3330,9.629181e+05,9.122382e+05
3319,3331,1.079643e+06,1.022819e+06


In [72]:
def effective_density_summary(concept_id, od_logsums_orig, metric_num, sector_column, verbose, filename_extension, filename_verbose):
    df_region_ed = od_logsums_orig[['orig_taz', sector_column]]
    df_region_ed.columns = ['Origin_zone', 'Value']

    df_region_ed['Concept_ID'] = concept_id
    df_region_ed['Metric_ID'] = metric_num
    df_region_ed['Metric_name'] = 'Effective density'
    df_region_ed['Submetric'] = metric_num 
    df_region_ed['Description'] = 'Effective density for ' + verbose + ' in origin zone'
    df_region_ed['Population'] = 'Whole Population'
    df_region_ed['Geography'] = 'Region'
    df_region_ed['Zone_ID'] = ''
    df_region_ed['Income'] = ''
    df_region_ed['Mode'] = ''
    df_region_ed['Purpose'] = ''
    #df_region_ed['Origin_zone'] = '
    df_region_ed['Dest_zone'] = ''
    df_region_ed['Period'] = ''
    df_region_ed['Units'] = ''
    df_region_ed['Total_Increment'] = ''
    
    df_region_ed = df_region_ed[perf_measure_columns]
    df_region_ed.to_csv(_join(summary_dir,  metric_num + filename_verbose + \
                              concept_id + '_region' +filename_extension + '.csv'), index=None)

In [73]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.1', 
                          'effective_density_good', 'goods producing industries', 
                          filename_extension, '_effective_density_goods_producing_industries_')

In [74]:
effective_density_summary(concept_id , od_logsums_orig, 'E1.5.2', 
                          'effective_density_services', 'services producing industries', 
                          filename_extension, '_effective_density_services_producing_industries_')