In [2]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

In [21]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egr = params['access_egress_modes']

income_categories_bins = params['income_categories_bins']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']

perceived_tt_cores = params['perceived_travel_time']

best_path_skim_extension = params['best_path_skim_extension']

actual_tt_cores = params['total_travel_time']

In [4]:
#create folders to save files
summary_dir = params['summary_dir']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')

Path(summary_dir).mkdir(parents=True, exist_ok=True)
Path(preprocess_dir).mkdir(parents=True, exist_ok=True)

In [5]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz', 'income'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
highway_skims_dir = _join(skims_dir, "highway")

income_categories = params['income_categories_bins']

In [5]:
%%time
#create trips
df_trips = create_trip_roster(ctramp_dir, hh, pp_perc ,transbay_od, geo_cwks, link21_purp_mapping, iteration)

df_trips = df_trips.drop(columns=['person_num', 'tour_id', 'stop_id', 'orig_walk_segment',
                                  'tour_purpose', 'orig_purpose', 'dest_purpose',
                                 'dest_walk_segment', 'parking_taz', 'tour_mode', 'tour_category',
                                 'avAvailable', 'num_participants', 'new_dest_purp',
                                 'new_orig_purp', 'link21_tour_purp', 'link21_orig_purp',
                                 'link21_dest_purp', 'taxiWait', 'singleTNCWait', 'sharedTNCWait'])

df_trips['Period'] = df_trips['depart_hour'].map(time_period_mapping)
df_trips['Mode'] = df_trips['trip_mode'].map(mode_cat_mapping)
df_trips = df_trips.rename(columns={'income_bin' : 'Income'})

df_trips.to_parquet(_join(preprocess_dir, 'trip_roster.parquet'))

NAs in PP Share: 0
Wall time: 9min 55s


In [7]:
df_tours = create_tour_roster(ctramp_dir, hh, pp_perc ,transbay_od, geo_cwks, iteration)

df_tours = df_tours.drop(columns=['person_num', 'person_type', 'tour_id',
                                   'tour_category', 'orig_walk_segment', 'dest_walk_segment',
                                  'atWork_freq', 'num_ob_stops', 'num_ib_stops', 'avAvailable',
                                  'dcLogsum', 'origTaxiWait', 'destTaxiWait', 'origSingleTNCWait',
                                  'destSingleTNCWait', 'origSharedTNCWait','destSharedTNCWait', 
                                  'tour_composition', 'tour_participants'])

df_tours['Period'] = df_tours['start_hour'].map(time_period_mapping)
df_tours['Mode'] = df_tours['tour_mode'].map(mode_cat_mapping)
df_tours = df_tours.rename(columns={'income_bin' : 'Income'})

df_tours.to_parquet(_join(preprocess_dir, 'tour_roster.parquet'))

NAs in PP Share: 0


## Process Single Path Skims

In [12]:
file_name = omx.open_file(_join(r'C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_WLK_TRN_WLK_TM2_Run03_Baseline.omx'))

In [13]:
file_name.list_matrices()

['BOARDS',
 'DDIST',
 'DTIME',
 'FARE',
 'IVT',
 'IVTCOM',
 'IVTEXP',
 'IVTFRY',
 'IVTHVY',
 'IVTLOC',
 'IVTLRT',
 'IWAIT',
 'PIVTCOM',
 'PIVTEXP',
 'PIVTFRY',
 'PIVTHVY',
 'PIVTLOC',
 'PIVTLRT',
 'PerceivedTime_BestPath',
 'WACC',
 'WAIT',
 'WAUX',
 'WEGR',
 'XWAIT']

In [20]:
perceived_tt_cores, acc_egr

NameError: name 'acc_egr' is not defined

In [22]:
%%time
# find the best path with lowest travel time for each time period

perc_tod_tt = []
all_tod_tt = []
for per in time_periods:
    
    perc_df = []
    tt_df = []
    for acc in acc_egg:
        file_name = _join(params['best_path_skim_dir'], per+'_'+acc+ best_path_skim_extension+'.omx')
        if os.path.exists(file_name):
            print(file_name)
            skim = omx.open_file(file_name)
            
            # empty matrix 
            actl_mat_core = np.zeros(skim.shape())
            percvd_mat_core = np.zeros(skim.shape())
            
            # iterate over all cores to get total travel time
            for core in actual_tt_cores:
                actl_mat_core = actl_mat_core + np.array(skim[core])
                #print(mat_core.sum())
                
            for core in perceived_tt_cores:
                percvd_mat_core = percvd_mat_core + np.array(skim[core])

            df = pd.DataFrame(actl_mat_core)
            df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
            df['index'] = df['index'] + 1
            df['variable'] = df['variable'] + 1
            df.columns = ['orig', 'dest', 'tt']
            df['acc_egr'] = acc
            tt_df.append(df)
            
            df = pd.DataFrame(percvd_mat_core)
            df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
            df['index'] = df['index'] + 1
            df['variable'] = df['variable'] + 1
            df.columns = ['orig', 'dest', 'tt']
            df['acc_egr'] = acc
            perc_df.append(df)
            
        else:
            print(f'{file_name} doesn\'t exist')
    
    if len(tt_df)>0:
        df_temp = pd.concat(tt_df)
        print(acc_egr)
        df_temp = pd.pivot(df_temp, index=['orig', 'dest'], columns = ['acc_egr'], values = 'tt').reset_index()
        df_temp['min_tt'] = df_temp[acc_egr][df_temp[acc_egr] > 0].min(axis=1)
        df_temp = df_temp[['orig', 'dest', 'min_tt']]
        df_temp['tp'] = per
        all_tod_tt.append(df_temp)
        
    if len(perc_df)>0:
        df_temp = pd.concat(perc_df)
        df_temp = pd.pivot(df_temp, index=['orig', 'dest'], columns = ['acc_egr'], values = 'tt').reset_index()
        df_temp['min_tt'] = df_temp[acc_egr][df_temp[acc_egr] > 0].min(axis=1)
        df_temp = df_temp[['orig', 'dest', 'min_tt']]
        df_temp['tp'] = per
        perc_tod_tt.append(df_temp)
    
    else:
        continue

all_tod_tt = pd.concat(all_tod_tt)
all_tod_tt = pd.pivot(all_tod_tt, index=['orig', 'dest'], columns = ['tp'], values = 'min_tt')  


perc_tod_tt = pd.concat(perc_tod_tt)
perc_tod_tt = pd.pivot(perc_tod_tt, index=['orig', 'dest'], columns = ['tp'], values = 'min_tt')  

C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_WLK_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_KNR_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_PNR_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_WLK_TRN_PNR_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\am_WLK_TRN_KNR_TM2_Run03_Baseline.omx
['WLK_TRN_WLK', 'KNR_TRN_WLK', 'PNR_TRN_WLK', 'WLK_TRN_PNR', 'WLK_TRN_KNR']
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\md_WLK_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\md_KNR_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline\md_PNR_TRN_WLK_TM2_Run03_Baseline.omx
C:\MTC_tmpy\TM2_Run03\calibration_3332\SinglePath_TM2_Run03_Baseline

In [23]:
all_tod_tt.to_parquet(_join(preprocess_dir, 'tod_min_actual_travel_time.parquet'))
perc_tod_tt.to_parquet(_join(preprocess_dir, 'tod_min_perceived_travel_time.parquet'))

In [24]:
all_tod_tt.to_csv(_join(preprocess_dir, 'tod_min_actual_travel_time.csv.gz'), index=False, compression='gzip')
perc_tod_tt.to_csv(_join(preprocess_dir, 'tod_min_perceived_travel_time.csv.gz'), index=False, compression='gzip')

In [25]:
all_tod_tt

Unnamed: 0_level_0,tp,am,ea,ev,md,pm
orig,dest,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,,,,,
1,2,8458.0,7488.0,6727.0,5159.0,4939.0
1,3,5161.0,4761.0,4984.0,4710.0,4511.0
1,4,4358.0,5392.0,4612.0,4238.0,4133.0
1,5,4180.0,4240.0,4516.0,4145.0,4101.0
...,...,...,...,...,...,...
3332,3328,,,,,
3332,3329,26990.0,17518.0,,,16210.0
3332,3330,28082.0,,,,17280.0
3332,3331,29937.0,,,,19135.0


In [26]:
perc_tod_tt

Unnamed: 0_level_0,tp,am,ea,ev,md,pm
orig,dest,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,1,,,,,
1,2,9330.0,7610.0,7674.0,6456.0,6520.0
1,3,6462.0,5948.0,5940.0,5959.0,5876.0
1,4,6310.0,7441.0,6177.0,6162.0,6087.0
1,5,5995.0,5680.0,5831.0,5855.0,5904.0
...,...,...,...,...,...,...
3332,3328,,,,,
3332,3329,28376.0,17119.0,,,18019.0
3332,3330,29309.0,,,,18930.0
3332,3331,32091.0,,,,21712.0
