In [1]:
import openmatrix as omx
import pandas as pd
import numpy as np
import yaml
from utility import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
summary_dir = params['summary_dir']

time_periods = params['periods']
perf_measure_columns = params['final_columns']

best_path_skim_extension = params['best_path_skim_extension']

## Final Code

In [3]:
tod_skims = pd.read_parquet(_join(preprocess_dir, 'tod_min_perceived_travel_time.parquet'))

In [4]:
all_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
all_trips.columns

Index(['hh_id', 'person_id', 'inbound', 'orig_taz', 'dest_taz', 'depart_hour',
       'trip_mode', 'sampleRate', 'trip_type', 'trips', 'transbay_od',
       'orig_rdm_zones', 'orig_super_dist', 'orig_county', 'dest_rdm_zones',
       'dest_super_dist', 'dest_county', 'home_zone', 'pp_share',
       'link21_trip_purp', 'Period', 'Mode'],
      dtype='object')

In [5]:
tod_skims = tod_skims.reset_index()
tod_skims

tp,orig,dest,am,ea,ev,md,pm
0,1,1,,,,,
1,1,2,7999.0,7629.0,6868.0,5315.0,5075.0
2,1,3,6538.0,5932.0,6253.0,5880.0,5602.0
3,1,4,5626.0,6186.0,5768.0,5377.0,5192.0
4,1,5,5152.0,5045.0,5422.0,5034.0,4914.0
...,...,...,...,...,...,...,...
11102219,3332,3328,,,,,
11102220,3332,3329,26842.0,17573.0,,,16286.0
11102221,3332,3330,27934.0,,,,17356.0
11102222,3332,3331,29789.0,,,,19211.0


In [6]:
final_trips = []
for periods in time_periods:
    tp_df = all_trips[all_trips['Period'] == periods.lower()]
    tod_skims_df =  tod_skims[['orig', 'dest', periods.lower()]]
    tod_skims_df = tod_skims_df.dropna()
    tod_skims_df.columns = ['orig_taz', 'dest_taz', 'perc_tt']
    tp_df = pd.merge(tp_df, tod_skims_df, on = ['orig_taz', 'dest_taz'], how='inner')
    final_trips.append(tp_df)

In [7]:
final_trips = pd.concat(final_trips)

In [8]:
final_trips[1:5]

Unnamed: 0,hh_id,person_id,inbound,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,trip_type,trips,...,orig_county,dest_rdm_zones,dest_super_dist,dest_county,home_zone,pp_share,link21_trip_purp,Period,Mode,perc_tt
1,2085629,4850565.0,0,299,274,9,4,1.0,INM,1.0,...,1,San Francisco_37,3,1,325,0.0,work,am,Walk,5365.0
2,849904,2054650.0,0,299,274,9,1,1.0,INM,1.0,...,1,San Francisco_37,3,1,1582,0.0,work,am,Auto_SOV,5365.0
3,2227262,5202541.0,1,299,274,7,3,1.0,INM,1.0,...,1,San Francisco_37,3,1,274,0.0,escort,am,Auto_3+Person,5365.0
4,2227392,5202935.0,1,299,274,7,4,1.0,INM,1.0,...,1,San Francisco_37,3,1,274,0.0,othmaint,am,Walk,5365.0


In [9]:
final_trips['perc_tt'] = final_trips['perc_tt']/100

In [10]:
summary_cols = params['description_a1.1']
summary_cols

{'perc_tt': ['perceived total travel time',
  'A1.1',
  'minutes',
  'perceived travel travel time']}

In [11]:
# summarise for prioirty population
summary_cols = params['description_a1.1']

#time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []

for columns in summary_cols:

    for period in time_periods:

        df_temp = final_trips[final_trips['Period'] == period.lower()]
        
        #regional value
        region_value = df_temp[columns].mean()
        reg_df_temp = pd.DataFrame({'Population': 'Whole Population',
                              'Period': period,
                              'Value': region_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : '',
                              'Concept_ID': concept_id,
                              'Geography' : 'Regional',
                              'Purpose' : '',
                              'Metric_ID':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.1',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        reg_df.append(reg_df_temp)
        
        # transbay region
        tb_value = df_temp[df_temp['transbay_od']==1]
        tb_value = tb_value[columns].mean()

        tb_df_temp = pd.DataFrame({'Concept_ID': concept_id,
                                   'Population': 'Whole Population',
                                   'Period': period,
                                   'Value': tb_value,
                                   'Orig_zone' : '',
                                   'Dest_zone' : '',
                                   'Zone_ID' : '',
                                   'Purpose' : '',
                                   'Geography' : 'Transbay',
                                   'Total_Increment': '',
                                   'Metric_ID':summary_cols[columns][1],
                                   'Submetric':summary_cols[columns][1]+'.2',
                                   'Description' : summary_cols[columns][0],
                                   'Units' : summary_cols[columns][2],
                                   'Metric_name' : summary_cols[columns][3]}, index=[0])

        tb_df.append(tb_df_temp)         
        
        #county 
        county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
        county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])

        county_df_temp['Period'] = period
        county_df_temp['Population'] = 'Whole Population'
        county_df_temp['Zone_Id'] = ''
        county_df_temp['Concept_ID'] = concept_id
        county_df_temp['Geography'] = 'County'
        county_df_temp['Purpose'] = ''
        county_df_temp['Total_Increment'] = ''
        county_df_temp['Description'] = summary_cols[columns][0]
        county_df_temp['Metric_ID'] = summary_cols[columns][1]
        county_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
        county_df_temp['Units'] = summary_cols[columns][2]
        county_df_temp['Metric_name'] = summary_cols[columns][3]
        county_df.append(county_df_temp)


        #RDM Zones
        rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
        rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])

        rdm_df_temp['Period'] = period
        rdm_df_temp['Population'] = 'Whole Population'
        rdm_df_temp['Zone_ID'] = ''
        rdm_df_temp['Geography'] = 'RDM'
        rdm_df_temp['Concept_ID'] = concept_id
        rdm_df_temp['Purpose'] = ''
        rdm_df_temp['Total_Increment'] = ''
        rdm_df_temp['Description'] = summary_cols[columns][0]     
        rdm_df_temp['Metric_ID'] = summary_cols[columns][1]
        rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
        rdm_df_temp['Units'] = summary_cols[columns][2]
        rdm_df_temp['Metric_name'] = summary_cols[columns][3]
        rdm_df.append(rdm_df_temp)
        
        #super district
        sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
        sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])

        sd_df_temp['Period'] = period
        sd_df_temp['Population'] = 'Whole Population'
        sd_df_temp['Zone_ID'] = ''
        sd_df_temp['Concept_ID'] = concept_id
        sd_df_temp['Purpose'] = ''
        sd_df_temp['Total_Increment'] = ''
        sd_df_temp['Geography'] = 'Superdistrict'
        sd_df_temp['Description'] = summary_cols[columns][0]
        sd_df_temp['Metric_ID'] = summary_cols[columns][1]
        sd_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
        sd_df_temp['Units'] = summary_cols[columns][2]
        sd_df_temp['Metric_name'] = summary_cols[columns][3]
        sd_df.append(sd_df_temp)
        
        #prioirty population
        df_temp['pp_share'] = df_temp['pp_share']/100
        pp_value = weighted_average(df_temp, columns, 'pp_share')
        pp_df_temp = pd.DataFrame({'Population': 'Prioirty Population',
                              'Period': period,
                              'Value': pp_value,
                              'Origin_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : '',
                              'Purpose' : '',
                              'Concept_ID': concept_id,
                              'Geography' : 'Regional',
                              'Total_Increment': '',
                              'Metric_ID':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.6',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        pp_df.append(pp_df_temp)

        
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)

In [12]:
all_dfs = [reg_df, tb_df, county_df, rdm_df, sd_df, pp_df]

for dfs in all_dfs:
    metric_name = 'perceived_travel_time_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    dfs.to_csv(_join(summary_dir, metric_name + file_name + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0])
    
combined_df = pd.concat([reg_df, tb_df, county_df, rdm_df, sd_df, pp_df]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'perceived_travel_time_' + 'A1.1' + '.csv'), index=None)

KeyError: "['Origin_zone', 'Total_Increment'] not in index"

In [None]:
county_df