In [None]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']
period = params['periods']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
summary_dir = params['summary_dir']

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']

In [None]:
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))

In [None]:
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

In [None]:
create_rail_crowding_od_pairs(preprocess_dir, transit_skims_dir, period, acc_egg_modes)

In [None]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    df_od_cwd = omx.open_file(_join(preprocess_dir, "rail_crowding_od_v9_trim_" + period.upper() + ".omx"))
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_WLK', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    df_trn_wlk = pd.merge(df_trn_wlk, df_rail_cwd,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    #print(df_trn_wlk.columns)
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_PNR', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    #del df_trn_pnr_inb['orig']
    #del df_trn_pnr_inb['dest']
    
    df_trn_pnr_inb = pd.merge(df_trn_pnr_inb, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #print(df_trn_pnr_inb.columns)
    
    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'PNR_TRN_WLK', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #del 
    df_trn_pnr_outbnd = pd.merge(df_trn_pnr_outbnd, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #print(df_trn_pnr_outbnd.columns)

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_KNR', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_inb = pd.merge(df_trn_knr_inb, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'KNR_TRN_WLK', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_outbnd = pd.merge(df_trn_knr_outbnd, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)
print(df_trn_rail.columns)

In [None]:
df_trn_rail['crowd_trips'] = df_trn_rail['trips']*df_trn_rail['crowd']/100

In [None]:
df_trn_rail['pp_trips'] = df_trn_rail['trips']*df_trn_rail['pp_share']/100
df_trn_rail['crowd_pp_trips'] = df_trn_rail['pp_trips']*df_trn_rail['crowd']/100

In [None]:
df_trn_rail.columns

In [None]:
df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='crowd_trips')
df_temp1 = df_temp1.rename(columns={'Value':'crowd_trips'})


df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='trips')
df_temp2 = df_temp2.rename(columns={'Value':'trips'})


region_value = pd.merge(df_temp1, df_temp2, on = ['Period', 'Income'], how='left')
region_value['Value'] = region_value['crowd_trips'] / region_value['trips']
region_value = region_value[['Period', 'Income', 'Value']]

#regional value
#region_value = df_trn_rail.groupby(['Period'])['trips'].mean().reset_index()
#region_value = region_value.rename(columns={'trips': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'A1.10'
region_value['Metric_name'] = 'Crowding (Region)'
region_value['Submetric'] = 'A1.10.1'
region_value['Description'] = 'Average regional crowding level'
region_value['Population'] = 'Whole Population'
region_value['Geography'] = 'Regional'
region_value['Origin_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Purpose'] = ''
region_value['Mode'] = ''
#region_value['Income'] = ''
region_value['Zone_ID'] = ''
region_value['Units'] = 'minutes'
region_value['Total_Increment'] = ''
region_value = region_value[perf_measure_columns]

In [None]:
#final_df1.to_csv(_join(summary_dir, "A1.10.1_regional_crowding_" + concept_id + '_region' + filename_extension + ".csv"), index=False)

In [None]:
# summarise for prioirty population
#regional value

df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='crowd_pp_trips')
df_temp1 = df_temp1.rename(columns={'Value':'crowd_pp_trips'})


df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='pp_trips')
df_temp2 = df_temp2.rename(columns={'Value':'pp_trips'})


pp_region_value = pd.merge(df_temp1, df_temp2, on = ['Period', 'Income'], how='left')
pp_region_value['Value'] = pp_region_value['crowd_pp_trips'] / pp_region_value['pp_trips']
pp_region_value = pp_region_value[['Period', 'Income', 'Value']]

#region_value = df_trn_rail.groupby(['Period'])['pp_trips'].mean().reset_index()
#region_value = region_value.rename(columns={'pp_trips': 'Value'})
pp_region_value['Concept_ID'] = concept_id
pp_region_value['Metric_ID'] = 'A1.10'
pp_region_value['Metric_name'] = 'Crowding (Region)'
pp_region_value['Submetric'] = 'A1.10.2'
pp_region_value['Description'] = 'Average regional crowding level'
pp_region_value['Population'] = 'Prioirty Population'
pp_region_value['Geography'] = 'Regional'
pp_region_value['Origin_zone'] = ''
pp_region_value['Dest_zone'] = ''
pp_region_value['Purpose'] = ''
pp_region_value['Mode'] = ''
#pp_region_value['Income'] = ''
pp_region_value['Zone_ID'] = ''
pp_region_value['Units'] = 'minutes'
pp_region_value['Total_Increment'] = ''
pp_region_value = pp_region_value[perf_measure_columns]

In [None]:
#df_rdm = df_tours.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='trips')
df_temp1 = df_temp1.rename(columns={'Value': 'trips'})

df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='crowd_trips')
df_temp2 = df_temp2.rename(columns={'Value': 'crowd_trips'})


df_rdm = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how = 'left')
df_rdm['Value'] = df_rdm['crowd_trips'] /  df_rdm['trips']


df_rdm = df_rdm.rename(columns={ 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Period', 'Income', 'Value']]

df_rdm['Concept_ID'] = concept_id
df_rdm['Metric_ID'] = 'A1.10'
df_rdm['Metric_name'] = 'Crowding (Region)'
df_rdm['Submetric'] = 'A1.10.3'
df_rdm['Description'] = 'Average crowding level between RDM zones'
df_rdm['Population'] = 'Whole Population'
df_rdm['Geography'] = 'RDM'
df_rdm['Zone_ID'] = ''
df_rdm['Purpose'] = ''
df_rdm['Mode'] = ''
#df_rdm['Income'] = ''
df_rdm['Units'] = 'minutes'
df_rdm['Total_Increment'] = ''
df_rdm = df_rdm[perf_measure_columns]

In [None]:
df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='pp_trips')
df_temp1 = df_temp1.rename(columns={'Value': 'pp_trips'})

df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='crowd_pp_trips')
df_temp2 = df_temp2.rename(columns={'Value': 'crowd_pp_trips'})


df_rdm_pp = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how = 'left')

df_rdm_pp = df_rdm_pp.loc[df_rdm_pp['pp_trips']>0]

df_rdm_pp['Value'] = df_rdm_pp['crowd_pp_trips'] /  df_rdm_pp['pp_trips']


df_rdm_pp = df_rdm_pp.rename(columns={ 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm_pp = df_rdm_pp[['Origin_zone', 'Dest_zone', 'Period', 'Income' ,'Value']]

df_rdm_pp['Concept_ID'] = concept_id
df_rdm_pp['Metric_ID'] = 'A1.10'
df_rdm_pp['Metric_name'] = 'Crowding (Region)'
df_rdm_pp['Submetric'] = 'A1.10.4'
df_rdm_pp['Description'] = 'Average crowding level between RDM zones'
df_rdm_pp['Population'] = 'Prioirty Population'
df_rdm_pp['Geography'] = 'RDM'
df_rdm_pp['Zone_ID'] = ''
df_rdm_pp['Purpose'] = ''
df_rdm_pp['Mode'] = ''
#df_rdm_pp['Income'] = ''
df_rdm_pp['Units'] = 'minutes'
df_rdm_pp['Total_Increment'] = ''
df_rdm_pp = df_rdm_pp[perf_measure_columns]

In [None]:
all_dfs = [region_value, pp_region_value, df_rdm, df_rdm_pp]

for dfs in all_dfs:
    metric_name = '_regional_crowding_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0])
    
combined_df = pd.concat([region_value, pp_region_value, df_rdm, df_rdm_pp]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'A10.1' + '_regional_crowding_' + concept_id + '_region' +filename_extension+'.csv'), index=None)

#county 
county_df_temp = df_trn_rail.groupby(['orig_county', 'dest_county', 'Period'])['trips'].mean().reset_index()
county_df_temp = county_df_temp.rename(columns={'trips': 'Value',
                                               'orig_county': 'Origin_zone',
                                               'dest_county': 'Dest_zone'})
county_df_temp['Concept_ID'] = concept_id
county_df_temp['Metric_ID'] = 'A1.9'
county_df_temp['Metric_name'] = 'Crowding (Region)'
county_df_temp['Submetric'] = 'A1.9.2'
county_df_temp['Description'] = 'Crowding level between origin and destination county'
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Geography'] = 'County'
county_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
county_df_temp['Units'] = 'minutes'
county_df_temp['Total_Increment'] = ''


#super district
sd_df_temp = df_trn_rail.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].mean().reset_index()
sd_df_temp = sd_df_temp.rename(columns={'trips': 'Value',
                                        'orig_super_dist': 'Origin_zone',
                                        'dest_super_dist': 'Dest_zone'})
sd_df_temp['Concept_ID'] = concept_id
sd_df_temp['Metric_ID'] = 'A1.9'
sd_df_temp['Metric_name'] = 'Crowding (Region)'
sd_df_temp['Submetric'] = 'A1.9.3'
sd_df_temp['Description'] = 'Regional crowding level'
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Geography'] = 'Super district'
sd_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
sd_df_temp['Units'] = 'minutes'
sd_df_temp['Total_Increment'] = ''


#RDM Zones
rdm_df_temp = df_trn_rail.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])['trips'].mean().reset_index()
rdm_df_temp = rdm_df_temp.rename(columns={'trips': 'Value',
                                        'orig_rdm_zones': 'Origin_zone',
                                        'dest_rdm_zones': 'Dest_zone'})
rdm_df_temp['Concept_ID'] = concept_id
rdm_df_temp['Metric_ID'] = 'A1.9'
rdm_df_temp['Metric_name'] = 'Crowding (Region)'
rdm_df_temp['Submetric'] = 'A1.9.4'
rdm_df_temp['Description'] = 'Regional crowding level'
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
rdm_df_temp['Units'] = 'minutes'
rdm_df_temp['Total_Increment'] = ''




# transbay region
tb_value = df_trn_rail[df_trn_rail['transbay_od']==1]
tb_value = tb_value.groupby(['Period'])['trips'].mean().reset_index()

tb_value['Concept_ID'] = concept_id
tb_value['Metric_ID'] = 'A1.9'
tb_value['Metric_name'] = 'Crowding (Region)'
tb_value['Submetric'] = 'A1.9.5'
tb_value['Description'] = 'regional crowding level'
tb_value['Population'] = 'Whole Population'
tb_value['Geography'] = 'Transbay'
tb_value['Zone_ID'] = ''
tb_value['Origin_zone'] = ''
tb_value['Dest_zone'] = ''
tb_value['Units'] = 'minutes'
tb_value['Total_Increment'] = ''