In [None]:
import openmatrix as omx
import pandas as pd
import numpy as np
import yaml
from utility import *
import warnings
warnings.filterwarnings("ignore")

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
summary_dir = params['summary_dir']

time_periods = params['periods']
perf_measure_columns = params['final_columns']

best_path_skim_extension = params['best_path_skim_extension']

annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']

In [None]:
perf_measure_columns = ['Concept_ID',
 'Metric_ID',
 'Metric_name',
 'Submetric',
 'Description',
 'Population',
 'Period',
 'Geography',
 'Zone_ID',
 'Origin_zone',
 'Dest_zone',
 'Purpose',
 'Income',
 'Value',
 'Units',
 'Total_Increment']

## Final Code

In [None]:
tod_skims = pd.read_parquet(_join(preprocess_dir, 'tod_min_perceived_travel_time.parquet'))

In [None]:
all_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
all_trips.columns

In [None]:
tod_skims = tod_skims.reset_index()
tod_skims

In [None]:
final_trips = []
for periods in time_periods:
    tp_df = all_trips[all_trips['Period'] == periods.lower()]
    tod_skims_df =  tod_skims[['orig', 'dest', periods.lower()]]
    tod_skims_df = tod_skims_df.dropna()
    tod_skims_df.columns = ['orig_taz', 'dest_taz', 'perc_tt']
    tp_df = pd.merge(tp_df, tod_skims_df, on = ['orig_taz', 'dest_taz'], how='inner')
    final_trips.append(tp_df)

In [None]:
final_trips = pd.concat(final_trips)

In [None]:
final_trips[1:5]

In [None]:
final_trips['perc_tt'] = final_trips['perc_tt']/100

In [None]:
summary_cols = params['description_a1.1']
summary_cols

In [None]:
pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []

for columns in summary_cols:

    #for period in time_periods:
    #print(period)

    df_temp = final_trips# [final_trips['Period'] == period.lower()]

    df_temp[columns+'_trips'] = df_temp[columns] * df_temp['trips']

    #regional value
    reg_df_temp1 = df_temp.groupby(['Period', 'Income'])[columns+'_trips'].sum().reset_index()
    reg_df_temp2 = df_temp.groupby(['Period', 'Income'])['trips'].sum().reset_index()
    reg_df_temp = pd.merge(reg_df_temp1, reg_df_temp2, on=['Period', 'Income'], how='left')
    reg_df_temp[columns] = reg_df_temp[columns+'_trips']/reg_df_temp['trips']
    reg_df_temp = reg_df_temp[['Period', 'Income', columns]]
    reg_df_temp = reg_df_temp.rename(columns={columns:'Value'})
    reg_df_temp['Population'] = 'Whole Population'
    reg_df_temp['Origin_zone'] = ''
    reg_df_temp['Dest_zone'] = ''
    reg_df_temp['Zone_ID'] = ''
    reg_df_temp['Concept_ID'] = concept_id
    reg_df_temp['Geography'] = 'Regional'
    reg_df_temp['Purpose'] = ''
    reg_df_temp['Total_Increment'] = ''
    reg_df_temp['Metric_ID'] = summary_cols[columns][1]
    reg_df_temp['Submetric'] = summary_cols[columns][1]+'.1'
    reg_df_temp['Description'] = 'Average ' + summary_cols[columns][0]+ ' in the region'
    reg_df_temp['Units'] = summary_cols[columns][2]
    reg_df_temp['Metric_name'] = summary_cols[columns][3]
    reg_df.append(reg_df_temp)
        
    # transbay region
    tb_df_t = df_temp[df_temp['transbay_od']==1]
    tb_df_temp = tb_df_t.copy()
    tb_df_temp1 = tb_df_temp.groupby(['Period', 'Income'])[columns+'_trips'].sum().reset_index()
    tb_df_temp2 = tb_df_temp.groupby(['Period', 'Income'])['trips'].sum().reset_index()
    tb_df_temp = pd.merge(tb_df_temp1, tb_df_temp2, on=['Period', 'Income'], how='left')
    tb_df_temp[columns] = tb_df_temp[columns+'_trips']/tb_df_temp['trips']
    tb_df_temp = tb_df_temp[['Period', 'Income', columns]]
    tb_df_temp = tb_df_temp.rename(columns={columns:'Value'})
 
    tb_df_temp['Concept_ID'] = concept_id
    tb_df_temp['Population'] = 'Whole Population'
    tb_df_temp['Origin_zone'] = ''
    tb_df_temp['Dest_zone'] = ''
    tb_df_temp['Zone_ID'] = ''
    tb_df_temp['Purpose'] = ''
    tb_df_temp['Geography'] = 'Transbay'
    tb_df_temp['Total_Increment'] = ''
    tb_df_temp['Metric_ID'] = summary_cols[columns][1]
    tb_df_temp['Submetric'] = summary_cols[columns][1]+'.2'
    tb_df_temp['Description'] = 'Average' + summary_cols[columns][0] + ' in the transbay region'
    tb_df_temp['Units'] = summary_cols[columns][2]
    tb_df_temp['Metric_name'] = summary_cols[columns][3]

    tb_df.append(tb_df_temp)         
        
    #county 
    county_df_temp1 = df_temp.groupby(['orig_county', 'dest_county', 'Period', 'Income'])[columns+'_trips'].sum().reset_index()
    county_df_temp2 = df_temp.groupby(['orig_county', 'dest_county', 'Period', 'Income'])['trips'].sum().reset_index()
    county_df_temp = pd.merge(county_df_temp1, county_df_temp2, on=['orig_county', 'dest_county', 'Period', 'Income'], how='left')
    county_df_temp[columns] = county_df_temp[columns+'_trips']/county_df_temp['trips']
    county_df_temp = county_df_temp[['orig_county', 'dest_county', 'Period', 'Income' , columns]]
    county_df_temp = county_df_temp.rename(columns={columns:'Value', 
                                                    'orig_county': 'Origin_zone',
                                                    'dest_county': 'Dest_zone'})
    county_df_temp['Population'] = 'Whole Population'
    county_df_temp['Zone_ID'] = ''
    county_df_temp['Concept_ID'] = concept_id
    county_df_temp['Geography'] = 'County'
    county_df_temp['Purpose'] = ''
    county_df_temp['Total_Increment'] = ''
    county_df_temp['Description'] = 'Average ' + summary_cols[columns][0] + ' in the origin and destination county'
    county_df_temp['Metric_ID'] = summary_cols[columns][1]
    county_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
    county_df_temp['Units'] = summary_cols[columns][2]
    county_df_temp['Metric_name'] = summary_cols[columns][3]
    county_df.append(county_df_temp)


    #RDM Zones
    rdm_df_temp1 = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'])[columns+'_trips'].sum().reset_index()
    rdm_df_temp2 = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'])['trips'].sum().reset_index()
    rdm_df_temp = pd.merge(rdm_df_temp1, rdm_df_temp2, on=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how='left')
    rdm_df_temp[columns] = rdm_df_temp[columns+'_trips']/rdm_df_temp['trips']
    rdm_df_temp = rdm_df_temp[['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income', columns]]
    rdm_df_temp = rdm_df_temp.rename(columns={'orig_rdm_zones': 'Origin_zone', 
                                              'dest_rdm_zones': 'Dest_zone', 
                                              columns: 'Value'})

    rdm_df_temp['Population'] = 'Whole Population'
    rdm_df_temp['Zone_ID'] = ''
    rdm_df_temp['Geography'] = 'RDM'
    rdm_df_temp['Concept_ID'] = concept_id
    rdm_df_temp['Purpose'] = ''
    rdm_df_temp['Total_Increment'] = ''
    rdm_df_temp['Description'] = 'Average ' + summary_cols[columns][0] + ' in the origin and destination RDM zones'  
    rdm_df_temp['Metric_ID'] = summary_cols[columns][1]
    rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
    rdm_df_temp['Units'] = summary_cols[columns][2]
    rdm_df_temp['Metric_name'] = summary_cols[columns][3]
    rdm_df.append(rdm_df_temp)
        
        #super district        
    sd_df_temp1 = df_temp.groupby(['orig_super_dist', 'dest_super_dist', 'Period', 'Income'])[columns+'_trips'].sum().reset_index()
    sd_df_temp2 = df_temp.groupby(['orig_super_dist', 'dest_super_dist', 'Period', 'Income'])['trips'].sum().reset_index()
    sd_df_temp = pd.merge(sd_df_temp1, sd_df_temp2, on=['orig_super_dist', 'dest_super_dist', 'Period', 'Income'], how='left')
    sd_df_temp[columns] = sd_df_temp[columns+'_trips']/sd_df_temp['trips']
    sd_df_temp = sd_df_temp[['orig_super_dist', 'dest_super_dist', 'Period', 'Income', columns]]
    sd_df_temp = sd_df_temp.rename(columns={'orig_super_dist': 'Origin_zone', 
                                            'dest_super_dist': 'Dest_zone',
                                             columns: 'Value'})
    sd_df_temp['Population'] = 'Whole Population'
    sd_df_temp['Zone_ID'] = ''
    sd_df_temp['Concept_ID'] = concept_id
    sd_df_temp['Purpose'] = ''
    sd_df_temp['Total_Increment'] = ''
    sd_df_temp['Geography'] = 'Superdistrict'
    sd_df_temp['Description'] = 'Average '+ summary_cols[columns][0] + ' in the origin and destination Super district'  
    sd_df_temp['Metric_ID'] = summary_cols[columns][1]
    sd_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
    sd_df_temp['Units'] = summary_cols[columns][2]
    sd_df_temp['Metric_name'] = summary_cols[columns][3]
    sd_df.append(sd_df_temp)
        
        #prioirty population
    df_temp['pp_share'] = df_temp['pp_share']/100
    df_temp['pp_trips'] = df_temp['pp_share'] * df_temp['trips']
    df_temp['pp_'+columns] = df_temp['pp_trips'] * df_temp[columns]

    pp_df_temp1 = df_temp.groupby(['Period', 'Income'])['pp_'+columns].sum().reset_index()
    pp_df_temp2 = df_temp.groupby(['Period', 'Income'])['pp_trips'].sum().reset_index()
    pp_df_temp = pd.merge(pp_df_temp1, pp_df_temp2, on=['Period', 'Income'], how='left')
    pp_df_temp[columns] = pp_df_temp['pp_'+columns]/pp_df_temp['pp_trips']
    pp_df_temp = pp_df_temp[['Period', 'Income', columns]]
    pp_df_temp = pp_df_temp.rename(columns={columns:'Value'})
    
    pp_df_temp['Population'] = 'Prioirty Population'
    pp_df_temp['Origin_zone'] = ''
    pp_df_temp['Dest_zone'] = ''
    pp_df_temp['Zone_ID'] = ''
    pp_df_temp['Purpose'] = ''
    pp_df_temp['Concept_ID'] = concept_id
    pp_df_temp['Geography'] = 'Regional'
    pp_df_temp['Total_Increment'] = ''
    pp_df_temp['Metric_ID'] = summary_cols[columns][1]
    pp_df_temp['Submetric'] = summary_cols[columns][1]+'.6'
    pp_df_temp['Description'] = 'Average ' + summary_cols[columns][0] + ' in the region'
    pp_df_temp['Units'] = summary_cols[columns][2]
    pp_df_temp['Metric_name'] = summary_cols[columns][3]
    pp_df.append(pp_df_temp)

        
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)

In [None]:
#reg_df

In [None]:
all_dfs = [reg_df, tb_df, county_df, rdm_df, sd_df, pp_df]

for dfs in all_dfs:
    metric_name = 'perceived_travel_time_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    dfs.to_csv(_join(summary_dir, metric_name + file_name + filename_extension +  '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0], dfs['Value'].sum())
    
combined_df = pd.concat([reg_df, tb_df, county_df, rdm_df, sd_df, pp_df]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'perceived_travel_time_' + 'A1.1' + filename_extension + '.csv'), index=None)

In [None]:
#county_df