In [17]:
import openmatrix as omx
import pandas as pd
import numpy as np
import yaml
from utility import *
import warnings
warnings.filterwarnings("ignore")

In [30]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

In [19]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

In [32]:
def skim_core_to_df(skim, core, cols =['orig', 'dest', 'rail_od']):
    skim_df = pd.DataFrame(skim[core])
    skim_df = pd.melt(skim_df.reset_index(), id_vars='index', value_vars=skim_df.columns)
    skim_df['index'] = skim_df['index'] + 1
    skim_df['variable'] = skim_df['variable'] + 1
    skim_df.columns = cols

    return skim_df

def array2df(array, cols =['orig', 'dest', 'rail_od']):
    df = pd.DataFrame(array)
    df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
    df['index'] = df['index'] + 1
    df['variable'] = df['variable'] + 1
    df.columns = cols
    
    return df

def weighted_average(df, value, weight):
    val = df[value]
    wt = df[weight]
    return (val * wt).sum() / wt.sum()

def rename_columns(df, old_cols, new_cols=['Orig_zone', 'Dest_zone', 'Value']):
    """
    Renames columns of a pandas dataframe using lists of old and new column names.

    Args:
    df (pandas.DataFrame): The dataframe to rename columns in.
    old_cols (list): A list of old column names.
    new_cols (list): A list of new column names.

    Returns:
    pandas.DataFrame: The renamed dataframe.
    """

    # Use pandas' rename method to rename columns
    df = df.rename(columns=dict(zip(old_cols, new_cols)))

    return df

In [3]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']

skims_dir = _join(model_outputs_dir, "skims")
landuse_dir = _join(model_outputs_dir, "landuse")

#hwyskmMD = _join(params['best_path_skim_dir'], 'am_KNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx')

#transit skims - names
#acc_egr = params['access_egress_modes']
#transit_skim_files = []
#for per in params['periods']:
#    for acc in acc_egr:
#        file_name = _join(params['best_path_skim_dir'], per+acc+'_v9_1_release11302022_bestpathresults.omx')
#        transit_skim_files.append(file_name)

transit_skim_files = [r'C:\Users\vyadav\Cambridge Systematics\PROJ 210071 BART Link21 TDLU Modeling - Documents\Task 2 - Model Dev\2.3 - Model Construction\Performance Metrics\Model Outputs\TM2_09172022\Best Single Path\am_KNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx']
#cores - 'BOARDS', 'DDIST', 'DTIME', 'FARE', 'IVT', 'IVTCOM', 'IVTEXP', 'IVTFRY', 'IVTHVY', 'IVTLOC', 'IVTLRT',
# 'IWAIT', 'PIVTCOM', 'PIVTEXP', 'PIVTFRY', 'PIVTHVY', 'PIVTLOC', 'PIVTLRT', 'WACC', 'WAIT', 'WAUX', 'WEGR', 'XWAIT'

summary_outputs = params['summary_dir']

#demand matrices for active highway and transit
demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
highway_demand_dir = _join(demand_matrices_dir, "highway", "household")
active_demand_dir = _join(demand_matrices_dir, "active")
transit_demand_dir = _join(demand_matrices_dir, "transit")

#
perceived_tt_cores = params['perceived_travel_time']
time_periods = params['periods']
acc_egr = params['access_egress_modes']

In [4]:
#Read data

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

#get geogrpahies and priority population in the same file
geo_pp_cwks = pd.merge(geo_cwks, pp_perc, on = 'taz', how = 'left') 

#transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

#non work destinations tazs
non_work_tazs = pd.read_excel(_join(params['common_dir'], 'non_work_destinations.xlsx'))
#non_work_tazs = list(non_work_tazs['non_wrk_taz'])

In [8]:
%%time
# find the best path with lowest travel time for each time period

all_tod_tt = []
for per in time_periods:
    
    tt_df = []
    for acc in acc_egr:
        file_name = _join(params['best_path_skim_dir'], per+'_'+acc+'_v9_1_release11302022_bestpathresults.omx')
        if os.path.exists(file_name):
            print(file_name)
            skim = omx.open_file(file_name)
            
            # empty matrix 
            mat_core = np.zeros(skim.shape())
            
            # iterate over all cores to get total travel time
            for core in perceived_tt_cores:
                mat_core = mat_core + np.array(skim[core])
                #print(mat_core.sum())

            df = pd.DataFrame(mat_core)
            df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
            df['index'] = df['index'] + 1
            df['variable'] = df['variable'] + 1
            df.columns = ['orig', 'dest', 'tt']
            df['acc_egr'] = acc
            
            tt_df.append(df)
        else:
            print(f'{file_name} doesn\'t exist')
    
    if len(tt_df)>0:
        df_temp = pd.concat(tt_df)
        df_temp = pd.pivot(df_temp, index=['orig', 'dest'], columns = ['acc_egr'], values = 'tt').reset_index()
        df_temp['min_tt'] = df_temp[acc_egr][df_temp[acc_egr] > 0].min(axis=1)
        df_temp = df_temp[['orig', 'dest', 'min_tt']]
        df_temp['tp'] = per
        all_tod_tt.append(df_temp)
    
    else:
        continue

all_tod_tt = pd.concat(all_tod_tt)
all_tod_tt = pd.pivot(all_tod_tt, index=['orig', 'dest'], columns = ['tp'], values = 'min_tt')  

C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\am_WLK_TRN_WLK_v9_1_release11302022_bestpathresults.omx
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\am_KNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\am_PNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\am_WLK_TRN_PNR_v9_1_release11302022_bestpathresults.omx
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\am_WLK_TRN_KNR_v9_1_release11302022_bestpathresults.omx
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\md_WLK_TRN_WLK_v9_1_release11302022_bestpathresults.omx doesn't exist
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\md_KNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx doesn't exist
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\md_PNR_TRN_WLK_v9_1_release11302022_bestpathresults.omx doesn't exist
C:\VY-Projects\Link21\BaseYear2015\Best_Single_Path\md_WLK_TRN_PNR_v9_1_release11302022_bestpa

In [9]:
all_tod_tt['md'] = all_tod_tt['am']
all_tod_tt['ev'] = all_tod_tt['am']
all_tod_tt['pm'] = all_tod_tt['am']
all_tod_tt['ea'] = all_tod_tt['am']

all_tod_tt.to_parquet(_join(params['best_path_skim_dir'], 'tod_min_perceived_travel_time.parquet'))

In [4]:
def repeat_string(string, n):
    return [string] * n

In [5]:
period = params['periods'] #['EA', 'AM', 'MD', 'PM', 'EV']
acc_egg_modes = params['access_egress_modes'] #['KNR_TRN_WLK', 'PNR_TRN_WLK', 'WLK_TRN_KNR', 'WLK_TRN_PNR', 'WLK_TRN_WLK']

In [10]:
all_tod_tt = pd.read_parquet(_join(params['best_path_skim_dir'], 'tod_min_perceived_travel_time.parquet'))

## Final Code

In [10]:
tod_skims = pd.read_parquet(_join(params['best_path_skim_dir'], 'tod_min_perceived_travel_time.parquet'))

In [21]:
all_trips = pd.read_parquet(os.path.join(r'C:\VY-Projects\Link21\BaseYear2015', 'trips.parquet'))
all_trips.columns

Index(['hh_id', 'tour_id', 'stop_id', 'inbound', 'tour_purpose',
       'orig_purpose', 'dest_purpose', 'orig_taz', 'dest_taz', 'depart_hour',
       'trip_mode', 'tour_mode', 'tour_category', 'tours', 'transbay_od',
       'orig_rdm_zones', 'orig_super_dist', 'orig_county', 'dest_rdm_zones',
       'dest_super_dist', 'dest_county', 'home_zone', 'pp_share',
       'new_dest_purp', 'new_orig_purp', 'link21_tour_purp',
       'link21_orig_purp', 'link21_dest_purp', 'link21_trip_purp'],
      dtype='object')

In [13]:
tod_skims = tod_skims.reset_index()
tod_skims

tp,orig,dest,am,md,ev,pm,ea
0,1,1,,,,,
1,1,2,5615.0,5615.0,5615.0,5615.0,5615.0
2,1,3,3812.0,3812.0,3812.0,3812.0,3812.0
3,1,4,5300.0,5300.0,5300.0,5300.0,5300.0
4,1,5,5278.0,5278.0,5278.0,5278.0,5278.0
...,...,...,...,...,...,...,...
11102219,3332,3328,,,,,
11102220,3332,3329,13103.0,13103.0,13103.0,13103.0,13103.0
11102221,3332,3330,13679.0,13679.0,13679.0,13679.0,13679.0
11102222,3332,3331,15534.0,15534.0,15534.0,15534.0,15534.0


In [24]:
all_trips['Period'] = all_trips['depart_hour'].map(time_period_mapping)
all_trips['Mode'] = all_trips['trip_mode'].map(mode_cat_mapping)

In [25]:
all_trips['Period'] 

0           md
1           md
2           md
3           pm
4           md
          ... 
3842941     am
3842942     md
3842943     md
3842944    NaN
3842945     pm
Name: Period, Length: 3842946, dtype: object

In [27]:
final_trips = []
for periods in time_periods:
    tp_df = all_trips[all_trips['Period'] == periods.lower()]
    tod_skims_df =  tod_skims[['orig', 'dest', periods.lower()]]
    tod_skims_df = tod_skims_df.dropna()
    tod_skims_df.columns = ['orig_taz', 'dest_taz', 'perc_tt']
    tp_df = pd.merge(tp_df, tod_skims_df, on = ['orig_taz', 'dest_taz'], how='inner')
    final_trips.append(tp_df)

In [28]:
final_trips = pd.concat(final_trips)

In [29]:
final_trips

Unnamed: 0,hh_id,tour_id,stop_id,inbound,tour_purpose,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,...,pp_share,new_dest_purp,new_orig_purp,link21_tour_purp,link21_orig_purp,link21_dest_purp,link21_trip_purp,Period,Mode,perc_tt
0,1452884,11,-1,0,atwork_eat,Work,atwork,53,79,9,...,0.0,eatout,Work,social,work,social,work,am,Walk,1135.0
1,1247603,0,-1,0,work_very high,Home,work,53,79,7,...,0.0,work,Home,work,home,work,work,am,Walk,1135.0
2,1494614,11,-1,0,atwork_eat,Work,atwork,2343,2300,7,...,0.0,eatout,Work,social,work,social,work,am,Auto_SOV,3749.0
3,1221637,11,-1,0,atwork_maint,Work,atwork,73,26,9,...,0.0,othmaint,Work,othmaint,work,othmaint,work,am,Walk,4990.0
4,1221637,11,-1,0,atwork_business,Work,atwork,66,79,9,...,0.0,business,Work,business,work,business,business,am,Walk,1411.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62639,2768929,0,0,0,othdiscr,Home,shopping,3168,3248,5,...,0.0,shopping,Home,othdiscr,home,shopping,shopping,ea,Auto_2Person,37111.0
62640,2785615,0,-1,0,othdiscr,Home,othdiscr,3221,3136,5,...,0.0,othdiscr,Home,othdiscr,home,othdiscr,othdiscr,ea,Auto_3+Person,57123.0
62641,2790170,0,-1,0,social,Home,social,3251,3214,5,...,100.0,social,Home,social,home,social,social,ea,Auto_2Person,43492.0
62642,1019698,0,-1,0,shopping,Home,shopping,3267,3293,5,...,0.0,shopping,Home,shopping,home,shopping,shopping,ea,Auto_2Person,19689.0


In [33]:
# summarise for prioirty population
summary_cols = params['description_a1.1']

time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []

for columns in summary_cols:

    for period in time_periods:

        df_temp = final_trips[final_trips['Period'] == period.lower()]

        #prioirty population
        df_temp['pp_share'] = df_temp['pp_share']/100
        pp_value = weighted_average(df_temp, columns, 'pp_share')
        pp_df_temp = pd.DataFrame({'Population': 'Prioirty Population',
                              'Period': period,
                              'Value': pp_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.2',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        pp_df.append(pp_df_temp)

        #regional value
        region_value = df_temp[columns].mean()
        reg_df_temp = pd.DataFrame({'Population': 'Whole Population',
                              'Period': period,
                              'Value': region_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.1',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        reg_df.append(reg_df_temp)

        #county 
        county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
        county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])

        county_df_temp['Period'] = period
        county_df_temp['Population'] = 'Whole Population'
        county_df_temp['Zone_Id'] = ''
        county_df_temp['Geography'] = 'County'
        county_df_temp['Description'] = summary_cols[columns][0]
        county_df_temp['Metric'] = summary_cols[columns][1]
        county_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
        county_df_temp['Units'] = summary_cols[columns][2]
        county_df_temp['Metric_name'] = summary_cols[columns][3]
        county_df.append(county_df_temp)

        #super district
        sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
        sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])

        sd_df_temp['Period'] = period
        sd_df_temp['Population'] = 'Whole Population'
        sd_df_temp['Zone_ID'] = ''
        sd_df_temp['Geography'] = 'Superdistrict'
        sd_df_temp['Description'] = summary_cols[columns][0]
        sd_df_temp['Metric'] = summary_cols[columns][1]
        sd_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
        sd_df_temp['Units'] = summary_cols[columns][2]
        sd_df_temp['Metric_name'] = summary_cols[columns][3]
        sd_df.append(sd_df_temp)

        #RDM Zones
        rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
        rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])

        rdm_df_temp['Period'] = period
        rdm_df_temp['Population'] = 'Whole Population'
        rdm_df_temp['Zone_ID'] = ''
        rdm_df_temp['Geography'] = 'RDM'
        rdm_df_temp['Description'] = summary_cols[columns][0]     
        rdm_df_temp['Metric'] = summary_cols[columns][1]
        rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
        rdm_df_temp['Units'] = summary_cols[columns][2]
        rdm_df_temp['Metric_name'] = summary_cols[columns][3]
        rdm_df.append(rdm_df_temp)


        # transbay region
        tb_value = df_temp[df_temp['transbay_od']==1]
        tb_value = tb_value[columns].mean()

        tb_df_temp = pd.DataFrame({'Population': 'Whole Population',
                                   'Period': period,
                                   'Value': tb_value,
                                   'Orig_zone' : '',
                                   'Dest_zone' : '',
                                   'Zone_ID' : 'Megaregion',
                                   'Geography' : 'Transbay',
                                   'Metric':summary_cols[columns][1],
                                   'Submetric':summary_cols[columns][1]+'.6',
                                   'Description' : summary_cols[columns][0],
                                   'Units' : summary_cols[columns][2],
                                   'Metric_name' : summary_cols[columns][3]}, index=[0])

        tb_df.append(tb_df_temp)                     

    
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)

In [34]:
reg_df

Unnamed: 0,Population,Period,Value,Orig_zone,Dest_zone,Zone_ID,Geography,Metric,Submetric,Description,Units,Metric_name
0,Whole Population,AM,13005.807196,,,Megaregion,Regional,A1.1,A1.1.1,perceived total travel time,seconds,perceived travel travel time


In [35]:
county_df

Unnamed: 0,Orig_zone,Dest_zone,Value,Period,Population,Zone_Id,Geography,Description,Metric,Submetric,Units,Metric_name
0,1,1,12612.218646,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
1,1,2,10835.754505,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
2,1,3,13409.279709,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
3,1,4,12057.603835,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
4,1,5,12384.707753,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
...,...,...,...,...,...,...,...,...,...,...,...,...
76,9,5,15797.380441,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
77,9,6,14361.889624,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
78,9,7,12337.643185,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
79,9,8,13699.930102,AM,Whole Population,,County,perceived total travel time,A1.1,A1.1.5,seconds,perceived travel travel time
