In [None]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
#import pantab

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egr = params['access_egress_modes']
acc_egg_modes = params['access_egress_modes']

income_categories_bins = params['income_categories_bins']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']

perceived_tt_cores = params['perceived_travel_time']

best_path_skim_extension = params['best_path_skim_extension']

actual_tt_cores = params['total_travel_time']

In [None]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
#transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

perf_measure_columns = params['final_columns']

period = params['periods']

In [None]:
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))

#add ids to all trips
df_trips['id'] = range(len(df_trips))

# auto trips, non-motorized modes, raide hail
df_oth = df_trips.loc[df_trips['trip_mode'].isin([1,2,3,4,5,9])]
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

In [None]:
df_oth.shape, df_trn.shape, df_trips.shape

In [None]:
%%time

df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)

In [None]:
df_trn_rail.shape

In [None]:
rail_trips_id = list(df_trn_rail['id'])
df_trn_nonrail = df_trn[~(df_trn['id'].isin(rail_trips_id))]

#df_trn_nonrail = df_trn[np.isin(df_trn['countries'], c1, invert=True)]

In [None]:
df_trn_nonrail.shape

In [None]:
df_trn_nonrail['trip_mode'] = 10 # rail
df_trn_rail['trip_mode'] = 11 # no rail

In [None]:
df_trips = pd.concat([df_oth, df_trn_nonrail, df_trn_rail], ignore_index=True)
df_trips.shape

In [None]:
drop_cols = ['rail_mode', 'orig', 'dest', 'rail_od', ' id']
for cols in drop_cols:
    if cols in df_trips.columns:
        print(f'deleting columns {cols}')
        df_trips = df_trips.drop(columns=cols)
        

In [None]:
trip_purp_dict = {"work": 1,
                "shopping":2,    
                "escort":3,       
                "social": 4,      
                "school" : 5,     
                "othdiscr": 6,    
                "othmaint":7,     
                "business": 8 } 

pp_dict = {"equity_trips" : 1, 
           "nonequity_trips" : 0}

dtype = { 'orig_super_dist' : 'int16', 
          'dest_super_dist' : 'int16',
          'orig_county' : 'int16',
          'dest_county' : 'int16',
          'transbay_od' : 'int16', 
          'trip_purpose' : 'int16',
          'trip_mode' : 'int16',
          'depart_hour' : 'int16',
          'priority_population' : 'int16',
          'trips' : 'float32',
          'scenario' : str}

dtype_all = { 'orig_super_dist' : 'int16', 
          'dest_super_dist' : 'int16',
          'orig_county' : 'int16',
          'dest_county' : 'int16',
          'transbay_od' : 'int16', 
          'trip_purpose' : 'int16',
          'trip_mode' : 'int16',
          'depart_hour' : 'int16',
          'priority_population' : 'int16',
          'trips' : 'float32',
          'model_run' : str}

In [None]:
df_trips['trip_purpose'] = df_trips['link21_trip_purp'].map(trip_purp_dict)

In [None]:
%%time
df_all = df_trips.groupby(['orig_taz', 'dest_taz', 'orig_super_dist', 'dest_super_dist',
                       'orig_county', 'dest_county', 
                       'transbay_od', 
                       'trip_purpose', 
                       'trip_mode', 
                       'depart_hour', 
                       'pp_share'])['trips'].sum().reset_index()

df_all['equity_trips'] = df_all['trips']*df_all['pp_share']/100
df_all['nonequity_trips'] = df_all['trips'] - df_all['equity_trips']

df_all = pd.melt(df_all[df_all.trips != 0].drop(columns = ['pp_share','trips'], axis = 1), 
               id_vars = ['orig_taz', 'dest_taz',
                          'orig_super_dist','dest_super_dist',
                          'orig_county','dest_county','transbay_od',
                          'trip_purpose','trip_mode','depart_hour'],
               value_vars = ['equity_trips', 'nonequity_trips'],
               var_name = 'eq_trips',
               value_name = 'trips'
               )

df_all['priority_population'] = df_all['eq_trips'].map(pp_dict)
del df_all['eq_trips']

df_all['model_run'] = concept_id
df_all['trips'] = round(df_all['trips'])
df_all = df_all.loc[df_all['trips'] > 0].reset_index(drop=True)

df_all = df_all.astype(dtype_all)

df_all = df_all[['model_run', 'orig_taz', 'dest_taz', 'orig_super_dist', 'dest_super_dist',
                'orig_county', 'dest_county', 'transbay_od', 'trip_purpose',
               'trip_mode', 'depart_hour', 'priority_population', 'trips']]

print("Writing CSV file")
df_all.to_csv(_join(summary_outputs, "trips_"+concept_id+".csv.gz" ), index=False, compression='gzip')

In [None]:
df = df_trips.groupby(['orig_super_dist', 'dest_super_dist',
                       'orig_county', 'dest_county', 'transbay_od', 
                       'trip_purpose', 'trip_mode', 'depart_hour', 
                       'pp_share'])['trips'].sum().reset_index()

df['equity_trips'] = df['trips']*df['pp_share']/100
df['nonequity_trips'] = df['trips'] - df['equity_trips']

df = pd.melt(df[df.trips != 0].drop(columns = ['pp_share','trips'], axis = 1), 
               id_vars = ['orig_super_dist','dest_super_dist',
                          'orig_county','dest_county','transbay_od',
                          'trip_purpose','trip_mode','depart_hour'],
               value_vars = ['equity_trips', 'nonequity_trips'],
               var_name = 'eq_trips',
               value_name = 'trips'
               )

df['priority_population'] = df['eq_trips'].map(pp_dict)
del df['eq_trips']

df['scenario'] = concept_id
df['trips'] = round(df['trips'])
df = df.loc[df['trips'] > 0].reset_index(drop=True)

df = df.astype(dtype)

print("Writing trips for tableau dashboard")
df.to_parquet(_join(preprocess_dir, 'trip_dashboard_'+concept_id+'.parquet'))

In [None]:
df_all['trips'].sum(), df['trips'].sum()

In [None]:
df_all['trip_mode'].value_counts()