In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
pd.options.display.float_format = '{:,.3f}'.format

In [7]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
ctramp_dir = params['ctramp_dir']
model_outputs_dir = params['model_dir']
summary_dir = params['summary_dir']
concept_id = params['concept_id']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
model_year = params['model_year']
filename_extension = params['filename_extension']
hwy_skims_dir = _join(model_outputs_dir, r'skims\highway' )
iteration = params['iteration']

skims_dir = _join(model_outputs_dir, r'skims')

In [3]:
Path(summary_dir).mkdir(parents=True, exist_ok=True)
Path(preprocess_dir).mkdir(parents=True, exist_ok=True)

In [4]:
purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

### Calculate the taxi wait time for each origin zone

In [5]:
taz = pd.read_csv(_join(ctramp_dir, 'landuse', 'tazData_' + str(model_year) + '.csv'))
taz['popEmpSqMile'] = (taz['TOTPOP'] + taz['TOTEMP']) / (taz['TOTACRE'] * 0.0015625)

In [6]:
%%time
taz = taz[['ZONE', 'popEmpSqMile']]

# TNC 
#TNC_single_waitTime_mean =  10.3,8.5,8.4,6.3,3.0
#TNC_single_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#TNC_shared_waitTime_mean =  15.0,15.0,11.0,8.0,5.0
#TNC_shared_waitTime_sd =     4.1,4.1,4.1,4.1,2.0

#Taxi_waitTime_mean = 26.5,17.3,13.3,9.5,5.5
#Taxi_waitTime_sd =    6.4,6.4,6.4,6.4,6.4

#WaitTimeDistribution_EndPopEmpPerSqMi = 500,2000,5000,15000,9999999999

#TO DO: Ask John which wait time to use
taz['density_group'] = pd.cut(taz['popEmpSqMile'], bins= [-1, 500,2000,5000,15000,9999999999], 
                              labels=[10.3,8.5,8.4,6.3,3.0], ordered=False)
#taz['density_group'] = taz['density_group'].fillna(0)
taz['density_group'] =taz['density_group'].astype("int64")

taz = taz.sort_values('ZONE')
taxi_wait_time = np.repeat(taz['density_group'].values, len(taz)).reshape(len(taz), len(taz))

Wall time: 59 ms


### Load all the data from Skims 

In [44]:
%%time
# The data tab of the UEC file lists all the matrix cores and location an matrix files of skims
# 1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

# extract the file names, matrix cores 
matrix_df = pd.read_excel(_join(params['common_dir'], r"TripModeChoice.xlsx"), sheet_name='data')
matrix_df = matrix_df.iloc[9:]
matrix_df.columns = ['no', 'token', 'format', 'file','matrix', 'group', 'index']
#matrix_df[1:5]

# pre-processing
matrix_df['matrix_files'] = matrix_df['file'].str.replace('skims/', '')
matrix_df['path'] = 'skims'
matrix_df.loc[matrix_df['matrix_files'].str.contains('nonmot')==True, 'path'] = 'active'
matrix_df.loc[matrix_df['matrix_files'].str.contains('trnskm')==True, 'path'] = 'transit'
matrix_df.loc[matrix_df['matrix_files'].str.contains('hwyskm')==True, 'path'] = 'highway'
#matrix_df[1:5]

# Iterate over the DataFrame rows
for _, row in matrix_df.iterrows():
    variable_name = row['token']
    file_path = row['path']
    filename = row['matrix_files']
    matrix_cr = row['matrix']
    
    # Extract the variable name and index (if present)
    if '[' in variable_name:
        name_start = variable_name.index('[')
        name_end = variable_name.index(']')
        index = int(variable_name[name_start+1:name_end])
        variable_name = variable_name[:name_start]
    else:
        index=None
    
    # Read the file using numpy.load() and assign it to the variable with the specified index
    file = omx.open_file(_join(skims_dir, file_path, filename))
    file_contents = np.array(file[matrix_cr])
    print(variable_name,index, _join(skims_dir, file_path, filename), file_contents.sum(), file_contents.min(), file_contents.max())
    if '[' in row['token']:
        if variable_name in locals() and isinstance(locals()[variable_name], np.ndarray):
            arr = locals()[variable_name]
            if index >= len(arr):
                # Resize the array if the index is out of bounds
                new_arr = np.resize(arr, index + 1)
                new_arr[index] = file_contents
                locals()[variable_name] = new_arr
            else:
                arr[index] = file_contents
        else:
            arr = np.empty(index + 1, dtype=object)
            arr[index] = file_contents
            locals()[variable_name] = arr
    else:
        locals()[variable_name] = file_contents

DISTWALK None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\active\nonmotskm.omx 10796411595951.248 0.03142102696417673 1000000.0
DISTBIKE None C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\active\nonmotskm.omx 8910510616075.47 0.018911417199612486 1000000.0
SOV_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13965190000.0 0.11402832 1000000.0
SOV_DIST 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13847476000.0 0.035375483 1000000.0
SOV_BTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 800814500.0 0.0 472.0
SOV_VTOLL 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 740758100.0 0.0 334.74524
HOV2_TIME 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\highway\hwyskmEA.omx 13957872000.0 0.11402832 1000000.0
HOV2_DIST 1 C:\MTC_tmpy\TM

WLK_TRN_WLK_WAIT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 15679600000.0 0.0 13121.753
WLK_TRN_WLK_TOTIVT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 72267555000.0 0.0 35736.957
WLK_TRN_WLK_CROWD 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 460263070.0 0.0 1651.4585
WLK_TRN_WLK_IVT_LOC 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 14543465000.0 0.0 21880.174
WLK_TRN_WLK_IVT_EXP 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 13815339000.0 0.0 27026.084
WLK_TRN_WLK_IVT_LRT 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\transit\trnskmEA_WLK_TRN_WLK.omx 2572853500.0 0.0 8251.378
WLK_TRN_WLK_IVT_FRY 1 C:\MTC_tmpy\TM2_2050Baseline_R2_Run4\tm2py\examples\Link21_3332\skims\

KeyboardInterrupt: 

In [47]:
WLK_TRN_WLK_CROWD[1]

array([[0.0000000e+00, 1.4241566e+02, 7.5123985e+01, ..., 6.0224365e+01,
        6.0224365e+01, 6.0224365e+01],
       [6.0029912e-01, 0.0000000e+00, 9.7243439e+01, ..., 2.8183130e+02,
        2.8183130e+02, 2.8183130e+02],
       [9.3890438e+00, 9.5710107e+02, 0.0000000e+00, ..., 7.3917213e+01,
        7.3917213e+01, 7.3917213e+01],
       ...,
       [9.2909416e+01, 1.1827644e+03, 2.1985512e+02, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [9.2909416e+01, 1.1827644e+03, 2.1985512e+02, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [9.2909416e+01, 1.1827644e+03, 2.1985512e+02, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00]], dtype=float32)

In [8]:
# change th 1000000.0 values in DISTWALK and DISTBIKE to 0
DISTWALK = np.where(DISTWALK == 1000000.0, 0, DISTWALK)
DISTBIKE = np.where(DISTBIKE == 1000000.0, 0, DISTBIKE)

PNR_TRN_WLK_DTIM[1] =  np.where(PNR_TRN_WLK_DTIM[1] < 0, 0, PNR_TRN_WLK_DTIM[1])
PNR_TRN_WLK_DTIM[2] =  np.where(PNR_TRN_WLK_DTIM[2] < 0, 0, PNR_TRN_WLK_DTIM[2])
PNR_TRN_WLK_DTIM[3] =  np.where(PNR_TRN_WLK_DTIM[3] < 0, 0, PNR_TRN_WLK_DTIM[3])
PNR_TRN_WLK_DTIM[4] =  np.where(PNR_TRN_WLK_DTIM[4] < 0, 0, PNR_TRN_WLK_DTIM[4])
PNR_TRN_WLK_DTIM[5] =  np.where(PNR_TRN_WLK_DTIM[5] < 0, 0, PNR_TRN_WLK_DTIM[5])

KNR_TRN_WLK_DTIM[1] =  np.where(KNR_TRN_WLK_DTIM[1] < 0, 0, KNR_TRN_WLK_DTIM[1])
KNR_TRN_WLK_DTIM[2] =  np.where(KNR_TRN_WLK_DTIM[2] < 0, 0, KNR_TRN_WLK_DTIM[2])
KNR_TRN_WLK_DTIM[3] =  np.where(KNR_TRN_WLK_DTIM[3] < 0, 0, KNR_TRN_WLK_DTIM[3])
KNR_TRN_WLK_DTIM[4] =  np.where(KNR_TRN_WLK_DTIM[4] < 0, 0, KNR_TRN_WLK_DTIM[4])
KNR_TRN_WLK_DTIM[5] =  np.where(KNR_TRN_WLK_DTIM[5] < 0, 0, KNR_TRN_WLK_DTIM[5])

WLK_TRN_PNR_DTIM[1] =  np.where(WLK_TRN_PNR_DTIM[1] < 0, 0, WLK_TRN_PNR_DTIM[1])
WLK_TRN_PNR_DTIM[2] =  np.where(WLK_TRN_PNR_DTIM[2] < 0, 0, WLK_TRN_PNR_DTIM[2])
WLK_TRN_PNR_DTIM[3] =  np.where(WLK_TRN_PNR_DTIM[3] < 0, 0, WLK_TRN_PNR_DTIM[3])
WLK_TRN_PNR_DTIM[4] =  np.where(WLK_TRN_PNR_DTIM[4] < 0, 0, WLK_TRN_PNR_DTIM[4])
WLK_TRN_PNR_DTIM[5] =  np.where(WLK_TRN_PNR_DTIM[5] < 0, 0, WLK_TRN_PNR_DTIM[5])

WLK_TRN_KNR_DTIM[1] =  np.where(WLK_TRN_KNR_DTIM[1] < 0, 0, WLK_TRN_KNR_DTIM[1])
WLK_TRN_KNR_DTIM[2] =  np.where(WLK_TRN_KNR_DTIM[2] < 0, 0, WLK_TRN_KNR_DTIM[2])
WLK_TRN_KNR_DTIM[3] =  np.where(WLK_TRN_KNR_DTIM[3] < 0, 0, WLK_TRN_KNR_DTIM[3])
WLK_TRN_KNR_DTIM[4] =  np.where(WLK_TRN_KNR_DTIM[4] < 0, 0, WLK_TRN_KNR_DTIM[4])
WLK_TRN_KNR_DTIM[5] =  np.where(WLK_TRN_KNR_DTIM[5] < 0, 0, WLK_TRN_KNR_DTIM[5])

In [9]:
# randomly check few matrix cores
#PNR_TRN_WLK_DDIST[4].sum()
#PNR_TRN_WLK_DDIST[2].sum()

In [10]:
# use the PM peak crowding and iwait variables, transpose them, and use them for the AM.
PNR_TRN_WLK_CROWD[2] = PNR_TRN_WLK_CROWD[4].T
PNR_TRN_WLK_IWAIT[2] = PNR_TRN_WLK_IWAIT[4].T

In [11]:
# ct ramp has params.properties which has certain parameter values used in the utility equations. 
# Following function extracts these values.


def extract_property_values(file_path, variables):
    property_values = {}
    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()
            if line and not line.startswith('#'):
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                if key in variables:
                    property_values[key] = value
    return property_values

In [12]:
%%time
for purp in purpose:
    #print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    # Removing NAs
    uec_purp_params_prop = uec_purp.loc[~uec_purp['Token'].isna()]
    # extract the parameters that have % in in their names, clean up-remove % and replace . with _
    uec_purp_params_prop = uec_purp_params_prop.loc[(uec_purp_params_prop['Formula for variable'].str.contains('%')==True)]
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace('%', '') 
    uec_purp_params_prop['Formula for variable'] = uec_purp_params_prop['Formula for variable'].str.replace(".", "_")
    # read parameters file
    file_path = _join(ctramp_dir, 'input', 'params.properties')
    # extract list of parameters
    prop_variables = list(uec_purp_params_prop['Formula for variable'])
    prop_variables_tokens = list(uec_purp_params_prop['Token'])
    prop_variables = [x.replace('_', '.') for x in prop_variables]

    values = extract_property_values(file_path, prop_variables)
    
    # Create a dictionary to store the extracted values
    extracted_values = {}

    # Assign the extracted values to the dictionary
    for variable, value in values.items():
        extracted_values[variable] = value

    # Print the values from the extracted_values dictionary 
    for variable, value in extracted_values.items():
        #print(f'{variable}: {value}')
        exec(f'{variable.replace(".", "_")} = {value}')
    
    
    # Assign the values to tokens
    # example costInitialTaxi = %taxi.baseFare%
    for _, row in uec_purp_params_prop.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
            #print(f"Variable '{variable_name}' is defined.")
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue

    
    uec_purp_params = uec_purp.loc[~uec_purp['Formula for variable'].isna()]
    uec_purp_params = uec_purp_params.loc[~uec_purp_params['Token'].isna()]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('if')==True)]
    uec_purp_params = uec_purp_params.loc[~(uec_purp_params['Formula for variable'].str.contains('%')==True)]

    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].astype(str)
    uec_purp_params['Formula for variable'] = uec_purp_params['Formula for variable'].str.replace('@', '')

    key_column = 'Token'
    value_column = 'Formula for variable'

    # Create dictionary from selected columns
    data_dict = {}

    for _, row in uec_purp_params.iterrows():
        key = row[key_column]
        value = row[value_column]

        # Handle values that are strings
        if isinstance(value, str):
            try:
                value = int(value)
                data_dict[key] = value
            except ValueError:
                try:
                    value = float(value)
                    data_dict[key] = value
                except ValueError:
                    pass

    #get all the parameters
    variables = data_dict

    for _, row in uec_purp_params.iterrows():
        variable_name = row['Token']
        expression = row['Formula for variable']

        # Evaluate the expression and store the result in the local environment
        try:
            # Evaluate the expression and store the result in the local environment
            if expression in locals() and isinstance(locals()[expression], np.ndarray):
                value = locals()[expression]
            else:
                value = eval(expression)

            exec(f'{variable_name} = value')
        except NameError:
            #print(f"Variable '{variable_name}' is not defined.")
            continue
    
    #break
    
    int_zone = 3332
    da_util = np.empty((5, int_zone, int_zone))
    sr2_util = np.empty((5, int_zone, int_zone))
    sr3_util = np.empty((5, int_zone, int_zone))
    wlk_util =  np.empty((5, int_zone, int_zone))
    bike_util = np.empty((5, int_zone, int_zone))
    wlk_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_pnr_util = np.empty((5, int_zone, int_zone))
    pnr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    wlk_trn_knr_util = np.empty((5, int_zone, int_zone))
    knr_trn_wlk_util = np.empty((5, int_zone, int_zone))
    taxi_util = np.empty((5, int_zone, int_zone))
    
    for tripPeriod in time_period:
        #for trip_mode in oth_modes:
        #uec_purp_mode = uec_purp_df.loc[uec_purp_df['Description'].str.contains(trip_mode)==True]
        #uec_purp_mode['Formula for variable'] = uec_purp_mode['Formula for variable'].str.replace('tripPeriod', str(period))
        #uec_purp_mode['formula_calculation'] = 
        util = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'),'w')

        #Drive alone
        util['DA'] = c_ivt*SOV_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'DA', " ", np.array(util['DA']).min(), np.array(util['DA']).max())

        #Shared ride 2
        util['SR2'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR2', " ", np.array(util['SR2']).min(), np.array(util['SR2']).max())

        #Shared ride 3
        util['SR3'] = c_ivt*HOV3_TIME[tripPeriod][:int_zone, :int_zone]
        print(tripPeriod, purp, 'SR3', " ", np.array(util['SR3']).min(), np.array(util['SR3']).max())

        # Walk 
        util['WALK'] = (walk_dist<=1)* (c_walkTimeShort * np.minimum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) + \
                       (walk_dist>1)* (c_walkTimeLong * np.maximum(walk_dist * 60 / walkSpeed, walkThresh * 60 / walkSpeed)) 
        print(tripPeriod, purp, 'WALK', " ", np.array(util['WALK']).min(), np.array(util['WALK']).max())
        
        #Bike
        util['BIKE'] = (bike_dist<=6)*(c_bikeTimeShort* np.minimum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed)) + \
                       (bike_dist>6)*(c_bikeTimeLong* np.maximum(bike_dist*60/bikeSpeed, bikeThresh*60/bikeSpeed))
        print(tripPeriod, purp, 'BIKE', " ", np.array(util['BIKE']).min(), np.array(util['BIKE']).max())
        
        
        #Walk transit Walk
        util['WLK_TRN_WLK'] =  c_ivt*WLK_TRN_WLK_IVT_LOC[tripPeriod]/100 + \
                            c_ivt_exp*WLK_TRN_WLK_IVT_EXP[tripPeriod]/100 + \
                            c_ivt_lrt*WLK_TRN_WLK_IVT_LRT[tripPeriod]/100 + \
                            c_ivt_ferry*WLK_TRN_WLK_IVT_FRY[tripPeriod]/100 + \
                            c_ivt_hvy*WLK_TRN_WLK_IVT_HVY[tripPeriod]/100 + \
                            c_ivt_com*WLK_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_wlk * np.maximum(WLK_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_WLK_WAUX[tripPeriod]/100
        
        print(tripPeriod, purp, 'WLK_TRN_WLK', " ", np.array(util['WLK_TRN_WLK']).min(), np.array(util['WLK_TRN_WLK']).max())
        
        # Walk Transit PNR - Inbound
        util['WLK_TRN_PNR'] =  c_ivt*WLK_TRN_PNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_PNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_PNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_PNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_PNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_PNR_XWAIT[tripPeriod]/100 + \
                            c_dtim*WLK_TRN_PNR_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_PNR_BOARDS[tripPeriod]-1,0) + \
                            c_waux*WLK_TRN_PNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_PNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_PNR', " ", np.array(util['WLK_TRN_PNR']).min(), np.array(util['WLK_TRN_PNR']).max())

        # PNR transit Walk - Outbound
        util['PNR_TRN_WLK'] =  c_ivt*PNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*PNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*PNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(PNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(PNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*PNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_dtim*PNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(PNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_waux*PNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(PNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'PNR_TRN_WLK', " ", np.array(util['PNR_TRN_WLK']).min(), np.array(util['PNR_TRN_WLK']).max())

        # Walk Transit KNR - Inbound
        util['WLK_TRN_KNR'] = c_ivt*WLK_TRN_KNR_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*WLK_TRN_KNR_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*WLK_TRN_KNR_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(WLK_TRN_KNR_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(WLK_TRN_KNR_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*WLK_TRN_KNR_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(WLK_TRN_KNR_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*WLK_TRN_KNR_DTIM[tripPeriod]/100 + \
                            c_waux*WLK_TRN_KNR_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(WLK_TRN_KNR_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'WLK_TRN_KNR', " ", np.array(util['WLK_TRN_KNR']).min(), np.array(util['WLK_TRN_KNR']).max())

        # KNR Transit Walk - Outbound
        util['KNR_TRN_WLK'] = c_ivt*KNR_TRN_WLK_TOTIVT[tripPeriod]/100 + \
                            (c_ivt_com-c_ivt)*KNR_TRN_WLK_IVT_COM[tripPeriod]/100 + \
                            c_ivt_trn_crwd*KNR_TRN_WLK_CROWD[tripPeriod]/100 + \
                            c_shortiWait*np.minimum(KNR_TRN_WLK_IWAIT[tripPeriod]/100,waitThresh) + \
                            c_longiWait*np.maximum(KNR_TRN_WLK_IWAIT[tripPeriod]/100-waitThresh,0) + \
                            c_xwait*KNR_TRN_WLK_XWAIT[tripPeriod]/100 + \
                            c_xfers_drv*np.maximum(KNR_TRN_WLK_BOARDS[tripPeriod]-1,0) + \
                            c_dtim*KNR_TRN_WLK_DTIM[tripPeriod]/100 + \
                            c_waux*KNR_TRN_WLK_WAUX[tripPeriod]/100 + \
                            c_dacc_ratio*(KNR_TRN_WLK_DDIST[tripPeriod]/100/SOV_DIST[tripPeriod][:int_zone, :int_zone])
        print(tripPeriod, purp, 'KNR_TRN_WLK', " ", np.array(util['KNR_TRN_WLK']).min(), np.array(util['KNR_TRN_WLK']).max())

        # taxi
        util['RIDEHAIL'] = c_ivt*HOV2_TIME[tripPeriod][:int_zone, :int_zone]  + c_ivt*1.5*taxi_wait_time
        print(tripPeriod, purp, 'RIDEHAIL', " ", np.array(util['RIDEHAIL']).min(), np.array(util['RIDEHAIL']).max())
        
    
        util.close()

1 Work DA   -22000.0 -0.002508623
1 Work SR2   -22000.0 -0.002508623
1 Work SR3   -22000.0 -0.002508623
1 Work WALK   -19.223921100079313 -0.0
1 Work BIKE   -47.724191393766105 -0.0
1 Work WLK_TRN_WLK   -54.164055 -0.0
1 Work WLK_TRN_PNR   -26.305435 0.0
1 Work PNR_TRN_WLK   -38.9606 0.0
1 Work WLK_TRN_KNR   -26.216692 0.0
1 Work KNR_TRN_WLK   -38.19036 0.0
1 Work RIDEHAIL   -22000.33 -0.10150862305983901
2 Work DA   -22000.0 -0.0025205028
2 Work SR2   -22000.0 -0.0025205028
2 Work SR3   -22000.0 -0.0025205028
2 Work WALK   -19.223921100079313 -0.0
2 Work BIKE   -47.724191393766105 -0.0
2 Work WLK_TRN_WLK   -34.25541 -0.0
2 Work WLK_TRN_PNR   -29.575077 0.0
2 Work PNR_TRN_WLK   -18.515484 0.0
2 Work WLK_TRN_KNR   -29.33244 0.0
2 Work KNR_TRN_WLK   -15.388551 0.0
2 Work RIDEHAIL   -22000.33 -0.10152050277777017
3 Work DA   -22000.0 -0.002536045
3 Work SR2   -22000.0 -0.002536045
3 Work SR3   -22000.0 -0.002536045
3 Work WALK   -19.223921100079313 -0.0
3 Work BIKE   -47.724191393766105 -

4 Escort WLK_TRN_KNR   -35.97722 0.0
4 Escort KNR_TRN_WLK   -23.027073 0.0
4 Escort RIDEHAIL   -27900.4185 -0.12874310859777033
5 Escort DA   -27900.0 -0.003183112
5 Escort SR2   -27900.0 -0.003183112
5 Escort SR3   -27900.0 -0.003183112
5 Escort WALK   -24.379427213282405 -0.0
5 Escort BIKE   -60.52295181300339 -0.0
5 Escort WLK_TRN_WLK   -58.25675 -0.0
5 Escort WLK_TRN_PNR   -41.840477 0.0
5 Escort PNR_TRN_WLK   -38.55624 0.0
5 Escort WLK_TRN_KNR   -37.488884 0.0
5 Escort KNR_TRN_WLK   -33.90611 0.0
5 Escort RIDEHAIL   -27900.4185 -0.12873311201408505
1 Shopping DA   -27900.0 -0.00318139
1 Shopping SR2   -27900.0 -0.00318139
1 Shopping SR3   -27900.0 -0.00318139
1 Shopping WALK   -24.379427213282405 -0.0
1 Shopping BIKE   -60.52295181300339 -0.0
1 Shopping WLK_TRN_WLK   -67.79005 -0.0
1 Shopping WLK_TRN_PNR   -33.360073 0.0
1 Shopping PNR_TRN_WLK   -49.409126 0.0
1 Shopping WLK_TRN_KNR   -33.247528 0.0
1 Shopping KNR_TRN_WLK   -48.432316 0.0
1 Shopping RIDEHAIL   -27900.4185 -0.12873

3 Social SR3   -27900.0 -0.003216166
3 Social WALK   -24.379427213282405 -0.0
3 Social BIKE   -60.52295181300339 -0.0
3 Social WLK_TRN_WLK   -55.00296 -0.0
3 Social WLK_TRN_PNR   -35.169228 0.0
3 Social PNR_TRN_WLK   -35.052326 0.0
3 Social WLK_TRN_KNR   -34.6535 0.0
3 Social KNR_TRN_WLK   -31.192188 0.0
3 Social RIDEHAIL   -27900.4185 -0.12876616604924201
4 Social DA   -27900.0 -0.0031931086
4 Social SR2   -27900.0 -0.0031931086
4 Social SR3   -27900.0 -0.0031931086
4 Social WALK   -24.379427213282405 -0.0
4 Social BIKE   -60.52295181300339 -0.0
4 Social WLK_TRN_WLK   -38.516342 -0.0
4 Social WLK_TRN_PNR   -36.370235 0.0
4 Social PNR_TRN_WLK   -24.250967 0.0
4 Social WLK_TRN_KNR   -35.97722 0.0
4 Social KNR_TRN_WLK   -23.027073 0.0
4 Social RIDEHAIL   -27900.4185 -0.12874310859777033
5 Social DA   -27900.0 -0.003183112
5 Social SR2   -27900.0 -0.003183112
5 Social SR3   -27900.0 -0.003183112
5 Social WALK   -24.379427213282405 -0.0
5 Social BIKE   -60.52295181300339 -0.0
5 Social WLK_

In [13]:
# util.close()

In [14]:
# add mappings from time period and purpose
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
len(df_trips)

35981987

In [15]:
# inbound trips get orig purpose, outbound trips get dest purpose
df_trips['util_purpose'] = np.where(df_trips['inbound']==1, df_trips['orig_purpose'], df_trips['dest_purpose'])

In [16]:
#df_trips['util_purpose'].value_counts()

In [17]:
purp_dict = { 'work' : 'Work', 
              'shopping' : 'Shopping',
              'escort' : 'Escort', 
              'othdiscr': 'OthDiscr',
              'othmaint': 'OthMaint',
              'school' : 'School', 
              'eatout' : 'EatOut', 
              'atwork' : 'WorkBased', 
              'social' : 'Social',
              'university' : 'University'}

time_period = {1:'EA',2:'AM',3:'MD',4:'PM',5:'EV'} #1 for EA, 2 for AM, 3 for MD, 4 for PM and 5 for EV

purpose = ['Work', 'University', 'School', 'Escort', 'Shopping', 'EatOut', 
           'OthMaint', 'Social', 'OthDiscr', 'WorkBased']

In [18]:
df_trips['util_purpose'] = df_trips['util_purpose'].map(purp_dict)

In [19]:
%%time

import pandas as pd
import itertools

num_zones = 3332

for tripPeriod, value in time_period.items():

    for purp in purpose:
        print(f'Analyzing purpose: {purp} and time period: {value}')
        #df_temp = df_trips.query(f"util_purpose == {purp} and Period == {value.lower()}")
        #df_temp = df_trips.query(f"util_purpose == '{purp}' and Period == '{value.lower}'")
        df_temp = df_trips.loc[(df_trips['util_purpose'] == purp) & (df_trips['Period'] == value.lower())]
        
        # Generate all combinations of orig and dest
        combinations = list(itertools.product(range(1, num_zones + 1), repeat=2))

        # Create the DataFrame with orig and dest columns
        purp_df = pd.DataFrame(combinations, columns=['orig', 'dest'])

        # read utility files
        util_file = omx.open_file(_join(preprocess_dir, f'util_{tripPeriod}_{purp}.omx'))

        for core in util_file.list_matrices():
            print(f'extracting {core} core form utility file')
            mode_core = np.array(util_file[core])
            mode_core = np.where(mode_core == 0, -999, mode_core)
            skm_df = pd.DataFrame(mode_core)
            skm_df = pd.melt(skm_df.reset_index(), id_vars='index', value_vars=skm_df.columns)
            skm_df['index'] = skm_df['index'] + 1
            skm_df['variable'] = skm_df['variable'] + 1
            skm_df.columns = ['orig', 'dest', core]
            purp_df = pd.merge(purp_df, skm_df, on=['orig', 'dest'], how='left')
        
        df_temp = pd.merge(df_temp, purp_df, left_on=['orig_taz', 'dest_taz'], right_on=['orig', 'dest'], how='left')
        
        print(f'writing the trip file for purpose : {purp} and time period: {value}', df_temp.shape)
        df_temp.to_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        
        #break

Analyzing purpose: Work and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Work and time period: EA (305666, 40)
Analyzing purpose: University and time period: EA
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility f

Analyzing purpose: Shopping and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : Shopping and time period: AM (740724, 40)
Analyzing purpose: EatOut and time period: AM
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utili

Analyzing purpose: OthDiscr and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : OthDiscr and time period: MD (1071502, 40)
Analyzing purpose: WorkBased and time period: MD
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form u

extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility file
extracting WLK_TRN_WLK core form utility file
writing the trip file for purpose : School and time period: EV (141579, 40)
Analyzing purpose: Escort and time period: EV
extracting BIKE core form utility file
extracting DA core form utility file
extracting KNR_TRN_WLK core form utility file
extracting PNR_TRN_WLK core form utility file
extracting RIDEHAIL core form utility file
extracting SR2 core form utility file
extracting SR3 core form utility file
extracting WALK core form utility file
extracting WLK_TRN_KNR core form utility file
extracting WLK_TRN_PNR core form utility f

In [100]:
# combine all trips into one file

# read trip files
final_trips = []

for tripPeriod, value in time_period.items():
    for purp in purpose:
        temp = pd.read_parquet(_join(preprocess_dir, f'trip_{tripPeriod}_{purp}.parquet'))
        final_trips.append(temp)
        
final_trips = pd.concat(final_trips)
len(final_trips) 

35981987

In [101]:
# attach auto sufficiency
hh_data = pd.read_csv(_join(ctramp_dir, 'main', 'householdData_'+str(params['iteration'])+'.csv'))

hh_data['auto_suff_category'] = 0
hh_data.loc[hh_data['autos'] < hh_data['workers'], 'auto_suff_category'] = 1 #autoDeficientHh
hh_data.loc[hh_data['autos'] >= hh_data['workers'], 'auto_suff_category'] = 2 #autoSufficientHh
hh_data.loc[hh_data['autos']==0, 'auto_suff_category'] = 0 #zeroAutoHh

hh_data = hh_data[['hh_id', 'auto_suff_category']]

final_trips = pd.merge(final_trips, hh_data, on='hh_id', how='left')

In [102]:
#read ModeChoice.xls file
#!pip install xlrd
asc_df_final = []
for purp in purpose:
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
                   'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    mc_purp = pd.read_excel(_join(params['common_dir'], "ModeChoice.xls"), sheet_name=purp)
    mc_purp = mc_purp.iloc[2:]
    mc_purp.columns = uec_purp_columns # assign column names
    
    asc_df = mc_purp.loc[(mc_purp['Description'].str.contains('Alternative-specific constant')==True) & 
            (mc_purp['Filter'].str.contains('indivTour')==True)]
    
    asc_df = asc_df[['Description', 'Formula for variable']]
    asc_df['util_purpose'] = purp
    
    asc_df[['mode', 'asc_text1', 'asc_text2', 'auto_suff']] = asc_df['Description'].str.split('-', expand=True)
    
    asc_df['auto_suff'] = asc_df['auto_suff'].str.strip()
    asc_df['mode'] = asc_df['mode'].str.strip()
    
    asc_df['auto_suff_category'] = 0
    asc_df.loc[asc_df['auto_suff']=='Zero auto', 'auto_suff_category']  = 0
    asc_df.loc[asc_df['auto_suff']=='Auto deficient', 'auto_suff_category']  = 1
    asc_df.loc[asc_df['auto_suff']=='Auto sufficient', 'auto_suff_category']  = 2
    
    asc_df = asc_df.rename(columns={'Formula for variable': 'asc'})
    asc_df = asc_df[['mode', 'auto_suff_category', 'util_purpose', 'asc']]
    asc_df_final.append(asc_df)
    
asc_df = pd.concat(asc_df_final)

mode_names = {'Walk': 'walk_asc', 'Bike': 'bike_asc', 'Shared ride 2' : 'sr2_asc', 'Shared ride 3+' : 'sr3_asc',
              'Walk to Transit': 'wtw_asc', 'Park and ride Transit': 'PnR_asc', 'Kiss and ride Transit': 'KnR_asc',
              'Taxi': 'rh_asc'}

asc_df['mode'] = asc_df['mode'].map(mode_names)

asc_df = pd.pivot_table(asc_df, index=['auto_suff_category', 'util_purpose'], columns='mode', values='asc')
asc_df = asc_df.reset_index()

In [104]:
final_trips = pd.merge(final_trips, asc_df, on=['auto_suff_category', 'util_purpose'], how='left')

In [105]:
final_trips['SR2'] = final_trips['SR2'] + final_trips['sr2_asc']
final_trips['SR3'] = final_trips['SR3'] + final_trips['sr3_asc']
final_trips['WALK'] = final_trips['WALK'] + final_trips['walk_asc']
final_trips['BIKE'] = final_trips['BIKE'] + final_trips['bike_asc']
final_trips['WLK_TRN_WLK'] = final_trips['WLK_TRN_WLK'] + final_trips['wtw_asc']
final_trips['WLK_TRN_PNR'] = final_trips['WLK_TRN_PNR'] + final_trips['PnR_asc']
final_trips['PNR_TRN_WLK'] = final_trips['PNR_TRN_WLK'] + final_trips['PnR_asc']
final_trips['WLK_TRN_KNR'] = final_trips['WLK_TRN_KNR'] + final_trips['KnR_asc']
final_trips['KNR_TRN_WLK'] = final_trips['KNR_TRN_WLK'] + final_trips['KnR_asc']

### Calculate the logsums

In [106]:
auto_nesting_coef = 0.72
trn_nesting_coef = 0.72
nm_nest_coef = 0.72
ridehail_nest_coef = 1

In [107]:
columns = ['DA', 'SR2', 'SR3', 'WALK', 'BIKE', 'WLK_TRN_WLK', 'PNR_TRN_WLK', 'WLK_TRN_PNR', 'KNR_TRN_WLK', 'WLK_TRN_KNR', 'RIDEHAIL']

In [108]:
for cols in columns:
    print(final_trips[cols].min(), final_trips[cols].max())

-18.330202 -0.002508623
-54.56097119137451 2.36889883569479
-53.367603108161624 2.4646383529573708
-999.1736 6.645414836770833
-1030.35610516 17.251348189202616
-1004.96784 8.739778524411843
-1998.0 3.99876188061706
-1998.0 3.802234181142815
-1998.0 5.9513338254366985
-1998.0 5.945812766400444
-18.322276919555662 -0.10150862305983901


In [110]:
np.exp(-1999)

0.0

In [111]:
# Create logsum
#tiny = 1
final_trips['exp_auto'] = np.exp(final_trips['DA']/auto_nesting_coef) + \
                            np.exp(final_trips['SR2']/auto_nesting_coef) + \
                             np.exp(final_trips['SR3']/auto_nesting_coef)

final_trips['auto_ls'] = np.where(final_trips['exp_auto']>0, auto_nesting_coef * (np.log(final_trips['exp_auto'])),0)

final_trips['exp_trn'] = np.exp(final_trips['WLK_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_PNR']/trn_nesting_coef) + \
                               np.exp(final_trips['PNR_TRN_WLK']/trn_nesting_coef) + \
                               np.exp(final_trips['WLK_TRN_KNR']/trn_nesting_coef) + \
                               np.exp(final_trips['KNR_TRN_WLK']/trn_nesting_coef)

final_trips['trn_ls'] = np.where(final_trips['exp_trn'] > 0, trn_nesting_coef *(np.log(final_trips['exp_trn'])), 0)

final_trips['exp_nm'] = np.exp(final_trips['WALK']/nm_nest_coef) + \
                                  np.exp(final_trips['BIKE']/nm_nest_coef)
    
final_trips['non_mot_ls'] = np.where(final_trips['exp_nm'] > 0, nm_nest_coef * (np.log(final_trips['exp_nm'])), 0)

final_trips['exp_ridehail'] = np.exp(final_trips['RIDEHAIL']/ridehail_nest_coef)
final_trips['ridehail_ls'] = np.where(final_trips['exp_ridehail']>0, ridehail_nest_coef * (np.log(final_trips['exp_ridehail'])),0)

final_trips['allmode_ls'] = np.log(np.exp(final_trips['auto_ls']) + 
                                np.exp(final_trips['trn_ls']) + 
                                np.exp(final_trips['non_mot_ls']) + 
                                np.exp(final_trips['ridehail_ls']))

In [23]:
#final_trips['allmode_ls_adj'].max()

In [112]:
# Create logsum
final_trips['allmode_ls_adj'] = final_trips['allmode_ls']

In [113]:
final_trips['sum_ls'] = np.exp(final_trips['auto_ls']) + np.exp(final_trips['trn_ls']) +  np.exp(final_trips['non_mot_ls']) + np.exp(final_trips['ridehail_ls'])

In [26]:
#final_trips[final_trips['allmode_ls']<0][1:10]

### Get BETA IVT values for each purpose from UEC sheet

In [15]:
# get beta IVT for each purpose
ivt_purp = pd.DataFrame(columns=['util_purpose'])

for purp in purpose:
    print(purp)
    # read the purpose tab from the UEC file. 
    uec_purp_columns = ['No', 'Token', 'Description', 'Filter','Formula for variable', 
               'Index','Alt1', 'Alt2', 'Alt3', 'Alt4', 'Alt5', 'Alt6', 'Alt7', 'Alt8', 'Alt9']
    
    uec_purp = pd.read_excel(_join(params['common_dir'], "TripModeChoice.xlsx"), sheet_name=purp)
    uec_purp = uec_purp.iloc[2:]
    uec_purp.columns = uec_purp_columns # assign column names
    
    ivt = uec_purp.loc[uec_purp['Token']=='c_ivt', 'Formula for variable'].item()
    #ivt_lrt = uec_purp.loc[uec_purp['Token']=='c_ivt_lrt', 'Formula for variable'].item()
    #ivt_ferry = uec_purp.loc[uec_purp['Token']=='c_ivt_ferry', 'Formula for variable'].item()
    #ivt_exp = uec_purp.loc[uec_purp['Token']=='c_ivt_exp', 'Formula for variable'].item()
    #ivt_hvy = uec_purp.loc[uec_purp['Token']=='c_ivt_hvy', 'Formula for variable'].item()
    #ivt_com = uec_purp.loc[uec_purp['Token']=='c_ivt_com', 'Formula for variable'].item()
    
    ivt_purp = ivt_purp.append({'util_purpose': purp, 'b_ivt': ivt #'b_ivt_lrt': ivt_lrt,
                                #'b_ivt_ferry' : ivt_ferry, 'b_ivt_exp': ivt_exp, 
                                #'b_ivt_hvy': ivt_hvy, 'b_ivt_com': ivt_com
                               }, ignore_index=True)

Work
University
School
Escort
Shopping
EatOut
OthMaint
Social
OthDiscr
WorkBased


In [16]:
ivt_purp

Unnamed: 0,util_purpose,b_ivt
0,Work,-0.022
1,University,-0.027
2,School,-0.027
3,Escort,-0.028
4,Shopping,-0.028
5,EatOut,-0.028
6,OthMaint,-0.028
7,Social,-0.028
8,OthDiscr,-0.028
9,WorkBased,-0.028


In [114]:
# logsum benefits auto
final_trips['auto_proportion'] =  np.where(final_trips['auto_ls']!=0, (final_trips['allmode_ls_adj'] * \
                                    (np.exp(final_trips['auto_ls'])/final_trips['sum_ls'])), 0) #* \
                                        #(1/-final_trips['b_ivt'])) #
# logsum benefits transit
final_trips['trn_proportion'] = np.where(final_trips['trn_ls']!=0, (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['trn_ls'])/final_trips['sum_ls'])), 0) #* \
                                            #(1/-final_trips['b_ivt']))  #*

    # logsum benefits ridehail
final_trips['rh_proportion'] = np.where(final_trips['ridehail_ls']!=0, (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['ridehail_ls'])/final_trips['sum_ls'])), 0) #* \
                                            #(1/-final_trips['b_ivt']))  #* final_trips['trips']

# logsum benefits non-motorized
final_trips['nm_proportion'] =  np.where(final_trips['non_mot_ls']!=0, (final_trips['allmode_ls_adj'] * \
                                         (np.exp(final_trips['non_mot_ls'])/final_trips['sum_ls'])), 0) #* \
                                           # (1/-final_trips['b_ivt']))  #* final_trips['trips']

In [116]:
# create a empty dataframe of all OD combinations and time period
import pandas as pd
from itertools import product

numbers_1_to_3332 = range(1, 3333)
combinations_1_to_5 = range(1, 6)
combinations_strings = ['am', 'md', 'pm', 'ev', 'ea']

combinations = list(product(numbers_1_to_3332, numbers_1_to_3332, combinations_strings))

df = pd.DataFrame(combinations, columns=['orig_taz', 'dest_taz', 'Period'])
df.head()

Unnamed: 0,orig_taz,dest_taz,Period
0,1,1,am
1,1,1,md
2,1,1,pm
3,1,1,ev
4,1,1,ea


In [117]:
df_od = final_trips.groupby(['orig_taz', 'dest_taz', 'Period'])['auto_proportion', 'trn_proportion', 
                                                                'rh_proportion', 'nm_proportion',
                                                               'allmode_ls_adj'].sum().reset_index()

In [119]:
df = pd.merge(df, df_od, on=['orig_taz', 'dest_taz', 'Period'], how='left')

In [120]:
all_trips = final_trips.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
df = pd.merge(df, all_trips, on=['orig_taz', 'dest_taz', 'Period'], how='left')

In [121]:
%%time

#filter trips by mode
auto = final_trips.loc[final_trips['trip_mode'].isin([1,2,3])]
trn = final_trips.loc[final_trips['trip_mode'].isin([6,7,8])]
nm = final_trips.loc[final_trips['trip_mode'].isin([4,5])]
rh = final_trips.loc[final_trips['trip_mode'].isin([9])]

#get totals trips for each mode
auto_trip = auto.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
auto_trip = auto_trip.rename(columns={'trips': 'auto_trips'})

trn_trip = trn.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
trn_trip = trn_trip.rename(columns={'trips': 'trn_trips'})

nm_trip = nm.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
nm_trip = nm_trip.rename(columns={'trips': 'nm_trips'})

rh_trip = rh.groupby(['orig_taz', 'dest_taz', 'Period'])['trips'].sum().reset_index()
rh_trip = rh_trip.rename(columns={'trips': 'rh_trips'})

#merge the trips by each model from previous cell
df_trips=pd.merge(df, auto_trip,  on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    trn_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    nm_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left').merge(
    rh_trip, on=['orig_taz', 'dest_taz', 'Period'], how='left')

Wall time: 1min 40s


In [75]:
df_trips.columns

Index(['orig_taz', 'dest_taz', 'Period', 'auto_proportion', 'trn_proportion',
       'rh_proportion', 'nm_proportion', 'auto_trips', 'trn_trips', 'nm_trips',
       'rh_trips'],
      dtype='object')

In [122]:
df_trips.to_parquet(_join(concept_id+'_perc_ls3_trips.parquet'))