In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
summary_dir = _join(params['summary_dir'], 'revised_A1')
Path(summary_dir).mkdir(parents=True, exist_ok=True)

filename_extension = params['filename_extension']
perf_measure_columns = params['final_columns']

In [3]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
#person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
#household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

#person = pd.read_csv(person_file)

#hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
#hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
#transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

In [4]:
#df_trips = create_trip_roster(ctramp_dir, transbay_od, geo_cwks, link21_purp_mapping)
#df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

#df_trn['Period'] = df_trn['depart_hour'].map(time_period_mapping)
#df_trn['Mode'] = df_trn['trip_mode'].map(mode_cat_mapping)

In [5]:
all_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
#all_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster_2050_Baseline_R2_Run4.parquet'))
df_trn = all_trips.loc[all_trips['trip_mode'].isin([6,7,8])]

In [6]:
#df_trn

In [7]:
%%time
#time_periods = ['AM']
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_trn_pd = df_trn[df_trn['Period'] == period.lower()]
    df_trn_wlk = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_trn_pnr = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_knr = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    
    #for acc_egg in acc_egg_modes:
    #for Walk transit Walk
    df_skm_wk = pd.read_parquet(_join(preprocess_dir, period.lower() +'_WLK_TRN_WLK' + '_cores.parquet'))
    
    
    df_skm_wk = df_skm_wk.loc[df_skm_wk['ivt']>0]


    df_wlk = pd.merge(df_trn_wlk, df_skm_wk, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')

    # PNR Transit
    df_skm_pnr_ib = pd.read_parquet(_join(preprocess_dir, period.lower() +'_WLK_TRN_PNR' + '_cores.parquet'))
    df_skm_pnr_ib = df_skm_pnr_ib[df_skm_pnr_ib['ivt']>0]

    df_skm_pnr_ob = pd.read_parquet(_join(preprocess_dir, period.lower() +'_PNR_TRN_WLK' + '_cores.parquet')) 
    df_skm_pnr_ob = df_skm_pnr_ob[df_skm_pnr_ob['ivt']>0]

    df_trn_pnr_ib = df_trn_pnr[df_trn_pnr['inbound'] == 1] # returning home
    df_trn_pnr_ib = pd.merge(df_trn_pnr_ib, df_skm_pnr_ib, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr_ob = df_trn_pnr[df_trn_pnr['inbound'] != 1] # returning home
    df_trn_pnr_ob = pd.merge(df_trn_pnr_ob, df_skm_pnr_ob, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_pnr = pd.concat([df_trn_pnr_ib, df_trn_pnr_ob], ignore_index=True)

        # KNR Transit
    df_skm_knr_ib = pd.read_parquet(_join(preprocess_dir, period.lower() +'_WLK_TRN_KNR' + '_cores.parquet'))
    df_skm_knr_ib = df_skm_knr_ib[df_skm_knr_ib['ivt']>0]

    df_skm_knr_ob = pd.read_parquet(_join(preprocess_dir, period.lower() +'_KNR_TRN_WLK' + '_cores.parquet')) 
    df_skm_knr_ob = df_skm_knr_ob[df_skm_knr_ob['ivt']>0]

    df_trn_knr_ib = df_trn_knr[df_trn_knr['inbound'] == 1] # returning home
    df_trn_knr_ib = pd.merge(df_trn_knr_ib, df_skm_knr_ib, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr_ob = df_trn_knr[df_trn_knr['inbound'] != 1] # returning home
    df_trn_knr_ob = pd.merge(df_trn_knr_ob, df_skm_knr_ob, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_knr = pd.concat([df_trn_knr_ib, df_trn_knr_ob], ignore_index=True)

    df_trn_rail = pd.concat([df_wlk, df_pnr, df_knr], ignore_index=True)
    df_temp.append(df_trn_rail)
    

df_trn_rail = pd.concat(df_temp)

processing - am
processing - md
processing - pm
processing - ev
processing - ea
Wall time: 1min 19s


In [8]:
df_trn_rail

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,...,Mode,orig,dest,ivt,wacc,wait,wegr,dtime,xwait,trip_time
0,1777399,4318129.0,0,Work,atwork,136,10,9,6,1.0,...,WALK_TRANSIT,136,10,548.0,1206.0,80.0,1549.0,0.0,0.0,3383.0
1,1870289,4478513.0,1,escort,Home,136,10,8,6,1.0,...,WALK_TRANSIT,136,10,548.0,1206.0,80.0,1549.0,0.0,0.0,3383.0
2,1839171,4424217.0,0,eatout,social,136,10,9,6,1.0,...,WALK_TRANSIT,136,10,548.0,1206.0,80.0,1549.0,0.0,0.0,3383.0
3,1715705,4204192.0,0,Home,othmaint,136,10,7,6,1.0,...,WALK_TRANSIT,136,10,548.0,1206.0,80.0,1549.0,0.0,0.0,3383.0
4,1714331,4201741.0,0,Home,work,136,10,8,6,1.0,...,WALK_TRANSIT,136,10,548.0,1206.0,80.0,1549.0,0.0,0.0,3383.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63438,1612850,3969170.0,0,Home,work,3328,333,5,8,1.0,...,KNR_TRANSIT,3328,333,1100.0,0.0,130.0,2478.0,7174.0,0.0,10882.0
63439,1615089,3973315.0,0,Home,work,3330,2226,5,8,1.0,...,KNR_TRANSIT,3330,2226,2206.0,0.0,300.0,727.0,5052.0,0.0,8285.0
63440,1616438,3976062.0,0,Home,work,3332,613,5,8,1.0,...,KNR_TRANSIT,3332,613,100.0,0.0,125.0,462.0,8155.0,0.0,8842.0
63441,3469127,,0,Home,othmaint,1224,1046,5,8,1.0,...,KNR_TRANSIT,1224,1046,1156.0,0.0,500.0,2885.0,496.0,0.0,5037.0


In [9]:
df_trn_rail.columns

Index(['hh_id', 'person_id', 'inbound', 'orig_purpose', 'dest_purpose',
       'orig_taz', 'dest_taz', 'depart_hour', 'trip_mode', 'sampleRate',
       'trip_type', 'trips', 'transbay_od', 'orig_rdm_zones',
       'orig_super_dist', 'orig_county', 'dest_rdm_zones', 'dest_super_dist',
       'dest_county', 'home_zone', 'income', 'Income', 'pp_share',
       'link21_trip_purp', 'Period', 'Mode', 'orig', 'dest', 'ivt', 'wacc',
       'wait', 'wegr', 'dtime', 'xwait', 'trip_time'],
      dtype='object')

# summarise for prioirty population
summary_cols = params['description_a1.2']

time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []


for columns in summary_cols:
    
    for period in time_periods:
    
        df_temp = df_trn_rail[(df_trn_rail[columns] > 0) & (df_trn_rail['Period'] == period.lower())]
        
        #regional value
        region_value = df_temp[columns].mean()
        reg_df_temp = pd.DataFrame({'Population': 'Whole Population',
                              'Period': period,
                              'Value': region_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.1',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        reg_df.append(reg_df_temp)
        
        # transbay region
        tb_value = df_temp[df_temp['transbay_od']==1]
        tb_value = tb_value[columns].mean()
        
        tb_df_temp = pd.DataFrame({'Population': 'Whole Population',
                                   'Period': period,
                                   'Value': tb_value,
                                   'Orig_zone' : '',
                                   'Dest_zone' : '',
                                   'Zone_ID' : 'Megaregion',
                                   'Geography' : 'Transbay',
                                   'Metric':summary_cols[columns][1],
                                   'Submetric':summary_cols[columns][1]+'.6',
                                   'Description' : summary_cols[columns][0],
                                   'Units' : summary_cols[columns][2],
                                   'Metric_name' : summary_cols[columns][3]}, index=[0])
                                    
        tb_df.append(tb_df_temp)  
                
        #county 
        county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
        county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])
        
        county_df_temp['Period'] = period
        county_df_temp['Population'] = 'Whole Population'
        county_df_temp['Zone_Id'] = ''
        county_df_temp['Geography'] = 'County'
        county_df_temp['Description'] = summary_cols[columns][0]
        county_df_temp['Metric'] = summary_cols[columns][1]
        county_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
        county_df_temp['Units'] = summary_cols[columns][2]
        county_df_temp['Metric_name'] = summary_cols[columns][3]
        county_df.append(county_df_temp)
        
        #RDM Zones
        rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
        rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])
        
        rdm_df_temp['Period'] = period
        rdm_df_temp['Population'] = 'Whole Population'
        rdm_df_temp['Zone_ID'] = ''
        rdm_df_temp['Geography'] = 'RDM'
        rdm_df_temp['Description'] = summary_cols[columns][0]     
        rdm_df_temp['Metric'] = summary_cols[columns][1]
        rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
        rdm_df_temp['Units'] = summary_cols[columns][2]
        rdm_df_temp['Metric_name'] = summary_cols[columns][3]
        rdm_df.append(rdm_df_temp)
        
        #super district
        sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
        sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])
        
        sd_df_temp['Period'] = period
        sd_df_temp['Population'] = 'Whole Population'
        sd_df_temp['Zone_ID'] = ''
        sd_df_temp['Geography'] = 'Superdistrict'
        sd_df_temp['Description'] = summary_cols[columns][0]
        sd_df_temp['Metric'] = summary_cols[columns][1]
        sd_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
        sd_df_temp['Units'] = summary_cols[columns][2]
        sd_df_temp['Metric_name'] = summary_cols[columns][3]
        sd_df.append(sd_df_temp)
                
        #prioirty population
        df_temp['pp_share'] = df_temp['pp_share']/100
        pp_value = weighted_average(df_temp, 'ivt', 'pp_share')
        pp_df_temp = pd.DataFrame({'Population': 'Prioirty Population',
                              'Period': period,
                              'Value': pp_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.2',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        pp_df.append(pp_df_temp)
    
    
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)

In [10]:
summary_cols = params['description_a1.2']
summary_cols

{'ivt': ['in-vehicle travel time',
  'A1.2',
  'minutes',
  'actual in vehicle travel time'],
 'wait': ['wait time', 'A1.3', 'minutes', 'actual wait time'],
 'wacc': ['walk access time', 'A1.4', 'minutes', 'actual walk access time'],
 'wegr': ['walk egress time', 'A1.5', 'minutes', 'actual walk egress time'],
 'dtime': ['drive time', 'A1.6', 'minutes', 'actual drive time'],
 'xwait': ['transfer time', 'A1.7', 'minutes', 'actual transfer time'],
 'trip_time': ['total trip time', 'A1.8', 'minutes', 'actual total trip time']}

In [11]:
#time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []


for columns in summary_cols:
    
    #for period in time_periods:
    print(columns)
    df_temp = df_trn_rail[(df_trn_rail[columns] > 0)]
    df_temp[columns+'_trips'] = (df_temp[columns] * df_temp['trips'])/100

    #regional value
    reg_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column=columns+'_trips')
    reg_df_temp1 = reg_df_temp1.rename(columns={'Value': columns+'_trips'})
    
    reg_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column='trips')
    reg_df_temp2 = reg_df_temp2.rename(columns={'Value': 'trips'})
    
    #reg_df_temp1 = df_temp.groupby(['Period'])[columns+'_trips'].sum().reset_index()
    #reg_df_temp2 = df_temp.groupby(['Period'])['trips'].sum().reset_index()
    reg_df_temp = pd.merge(reg_df_temp1, reg_df_temp2, on=['Period', 'Income'], how='left')
    reg_df_temp[columns] = reg_df_temp[columns+'_trips']/reg_df_temp['trips']
    reg_df_temp = reg_df_temp[['Period', 'Income', columns]]
    reg_df_temp = reg_df_temp.rename(columns={columns:'Value'})
    reg_df_temp['Population'] = 'Whole Population'
    reg_df_temp['Origin_zone'] = ''
    reg_df_temp['Dest_zone'] = ''
    reg_df_temp['Zone_ID'] = ''
    reg_df_temp['Concept_ID'] = concept_id
    reg_df_temp['Geography'] = 'Regional'
    reg_df_temp['Purpose'] = ''
    reg_df_temp['Mode'] = ''
    reg_df_temp['Total_Increment'] = ''
    reg_df_temp['Metric_ID'] = summary_cols[columns][1]
    reg_df_temp['Submetric'] = summary_cols[columns][1]+'.1'
    reg_df_temp['Description'] = 'Average ' + summary_cols[columns][0]+ ' in the region'
    reg_df_temp['Units'] = summary_cols[columns][2]
    reg_df_temp['Metric_name'] = summary_cols[columns][3]
    #reg_df.append(reg_df_temp)

    # transbay region
    tb_df_t = df_temp[df_temp['transbay_od']==1]
    tb_df_temp = tb_df_t.copy()
    tb_df_temp1 = summarize_all_combinations(tb_df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column=columns+'_trips')
    tb_df_temp1 = tb_df_temp1.rename(columns={'Value': columns+'_trips'})
    
    tb_df_temp2 = summarize_all_combinations(tb_df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column='trips')
    tb_df_temp2 = tb_df_temp2.rename(columns={'Value': 'trips'})
    
    #tb_df_temp1 = tb_df_temp.groupby(['Period'])[columns+'_trips'].sum().reset_index()
    #tb_df_temp2 = tb_df_temp.groupby(['Period'])['trips'].sum().reset_index()
    tb_df_temp = pd.merge(tb_df_temp1, tb_df_temp2, on=['Period', 'Income'], how='left')
    tb_df_temp[columns] = tb_df_temp[columns+'_trips']/tb_df_temp['trips']
    tb_df_temp = tb_df_temp[['Period', 'Income', columns]]
    tb_df_temp = tb_df_temp.rename(columns={columns:'Value'})

    tb_df_temp['Concept_ID'] = concept_id
    tb_df_temp['Population'] = 'Whole Population'
    tb_df_temp['Origin_zone'] = ''
    tb_df_temp['Dest_zone'] = ''
    tb_df_temp['Zone_ID'] = ''
    tb_df_temp['Purpose'] = ''
    tb_df_temp['Mode'] = ''
    tb_df_temp['Geography'] = 'Transbay'
    tb_df_temp['Total_Increment'] = ''
    tb_df_temp['Metric_ID'] = summary_cols[columns][1]
    tb_df_temp['Submetric'] = summary_cols[columns][1]+'.2'
    tb_df_temp['Description'] = 'Average' + summary_cols[columns][0] + ' in the transbay region'
    tb_df_temp['Units'] = summary_cols[columns][2]
    tb_df_temp['Metric_name'] = summary_cols[columns][3]

    #tb_df.append(tb_df_temp)         

    #county 
    #county_df_temp1 = df_temp.groupby(['orig_county', 'dest_county', 'Period'])[columns+'_trips'].sum().reset_index()
    #county_df_temp2 = df_temp.groupby(['orig_county', 'dest_county', 'Period'])['trips'].sum().reset_index()
    county_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['orig_county', 'dest_county', 'Period', 'Income'], 
                                                  summary_column=columns+'_trips')
    county_df_temp1 = county_df_temp1.rename(columns={'Value': columns+'_trips'})
    
    county_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['orig_county', 'dest_county', 'Period', 'Income'], 
                                                  summary_column='trips')
    county_df_temp2 = county_df_temp2.rename(columns={'Value': 'trips'})
    
    county_df_temp = pd.merge(county_df_temp1, county_df_temp2, on=['orig_county', 'dest_county', 'Period', 'Income'], how='left')
    county_df_temp[columns] = county_df_temp[columns+'_trips']/county_df_temp['trips']
    county_df_temp = county_df_temp[['orig_county', 'dest_county', 'Period', 'Income' ,columns]]
    county_df_temp = county_df_temp.rename(columns={columns:'Value', 
                                                    'orig_county': 'Origin_zone',
                                                    'dest_county': 'Dest_zone'})
    county_df_temp['Population'] = 'Whole Population'
    county_df_temp['Zone_ID'] = ''
    county_df_temp['Concept_ID'] = concept_id
    county_df_temp['Geography'] = 'County'
    county_df_temp['Purpose'] = ''
    county_df_temp['Mode'] = ''
    county_df_temp['Total_Increment'] = ''
    county_df_temp['Description'] = 'Average ' + summary_cols[columns][0] + ' in the origin and destination county'
    county_df_temp['Metric_ID'] = summary_cols[columns][1]
    county_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
    county_df_temp['Units'] = summary_cols[columns][2]
    county_df_temp['Metric_name'] = summary_cols[columns][3]
    #county_df.append(county_df_temp)


    #RDM Zones
    rdm_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                                  summary_column=columns+'_trips')
    rdm_df_temp1 = rdm_df_temp1.rename(columns={'Value': columns+'_trips'})
    
    rdm_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                                  summary_column='trips')
    rdm_df_temp2 = rdm_df_temp2.rename(columns={'Value': 'trips'})
    #rdm_df_temp1 = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])[columns+'_trips'].sum().reset_index()
    #rdm_df_temp2 = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])['trips'].sum().reset_index()
    rdm_df_temp = pd.merge(rdm_df_temp1, rdm_df_temp2, on=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how='left')
    rdm_df_temp[columns] = rdm_df_temp[columns+'_trips']/rdm_df_temp['trips']
    rdm_df_temp = rdm_df_temp[['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income', columns]]
    rdm_df_temp = rdm_df_temp.rename(columns={'orig_rdm_zones': 'Origin_zone', 
                                              'dest_rdm_zones': 'Dest_zone', 
                                              columns: 'Value'})

    rdm_df_temp['Population'] = 'Whole Population'
    rdm_df_temp['Zone_ID'] = ''
    rdm_df_temp['Geography'] = 'RDM'
    rdm_df_temp['Concept_ID'] = concept_id
    rdm_df_temp['Purpose'] = ''
    rdm_df_temp['Mode'] = ''
    rdm_df_temp['Total_Increment'] = ''
    rdm_df_temp['Description'] = summary_cols[columns][0] + ' in the origin and destination RDM zones'  
    rdm_df_temp['Metric_ID'] = summary_cols[columns][1]
    rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
    rdm_df_temp['Units'] = summary_cols[columns][2]
    rdm_df_temp['Metric_name'] = summary_cols[columns][3]
    #rdm_df.append(rdm_df_temp)

        #super district   
    sd_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period', 'Income'], 
                                                  summary_column=columns+'_trips')
    sd_df_temp1 = sd_df_temp1.rename(columns={'Value': columns+'_trips'})
    
    sd_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['orig_super_dist', 'dest_super_dist', 'Period', 'Income'], 
                                                  summary_column='trips')
    sd_df_temp2 = sd_df_temp2.rename(columns={'Value': 'trips'})
    #sd_df_temp1 = df_temp.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])[columns+'_trips'].sum().reset_index()
    #sd_df_temp2 = df_temp.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].sum().reset_index()
    
    sd_df_temp = pd.merge(sd_df_temp1, sd_df_temp2, on=['orig_super_dist', 'dest_super_dist', 'Period', 'Income'], how='left')
    sd_df_temp[columns] = sd_df_temp[columns+'_trips']/sd_df_temp['trips']
    sd_df_temp = sd_df_temp[['orig_super_dist', 'dest_super_dist', 'Period', 'Income', columns]]
    sd_df_temp = sd_df_temp.rename(columns={'orig_super_dist': 'Origin_zone', 
                                            'dest_super_dist': 'Dest_zone',
                                             columns: 'Value'})
    sd_df_temp['Population'] = 'Whole Population'
    sd_df_temp['Zone_ID'] = ''
    sd_df_temp['Concept_ID'] = concept_id
    sd_df_temp['Purpose'] = ''
    sd_df_temp['Mode'] = ''
    sd_df_temp['Total_Increment'] = ''
    sd_df_temp['Geography'] = 'Superdistrict'
    sd_df_temp['Description'] = summary_cols[columns][0] + ' in the origin and destination Super district'  
    sd_df_temp['Metric_ID'] = summary_cols[columns][1]
    sd_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
    sd_df_temp['Units'] = summary_cols[columns][2]
    sd_df_temp['Metric_name'] = summary_cols[columns][3]
    #sd_df.append(sd_df_temp)

        #prioirty population
    df_temp['pp_share'] = df_temp['pp_share']/100
    df_temp['pp_trips'] = df_temp['pp_share'] * df_temp['trips']
    df_temp['pp_'+columns] = df_temp['pp_trips'] * df_temp[columns] / 100
    
    pp_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column='pp_'+columns)
    pp_df_temp1 = pp_df_temp1.rename(columns={'Value': 'pp_'+columns})
    
    pp_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['Period', 'Income'], 
                                                  summary_column='pp_trips')
    pp_df_temp2 = pp_df_temp2.rename(columns={'Value': 'pp_trips'})

    #pp_df_temp1 = df_temp.groupby(['Period'])['pp_'+columns].sum().reset_index()
    #pp_df_temp2 = df_temp.groupby(['Period'])['pp_trips'].sum().reset_index()
    pp_df_temp = pd.merge(pp_df_temp1, pp_df_temp2, on=['Period', 'Income'], how='left')
    pp_df_temp[columns] = pp_df_temp['pp_'+columns]/pp_df_temp['pp_trips']
    pp_df_temp = pp_df_temp[['Period', 'Income' ,columns]]
    pp_df_temp = pp_df_temp.rename(columns={columns:'Value'})

    pp_df_temp['Population'] = 'Prioirty Population'
    pp_df_temp['Origin_zone'] = ''
    pp_df_temp['Dest_zone'] = ''
    pp_df_temp['Zone_ID'] = ''
    pp_df_temp['Purpose'] = ''
    pp_df_temp['Mode'] = ''
    pp_df_temp['Concept_ID'] = concept_id
    pp_df_temp['Geography'] = 'Region'
    pp_df_temp['Total_Increment'] = ''
    pp_df_temp['Metric_ID'] = summary_cols[columns][1]
    pp_df_temp['Submetric'] = summary_cols[columns][1]+'.6'
    pp_df_temp['Description'] = summary_cols[columns][0] + ' in the region'
    pp_df_temp['Units'] = summary_cols[columns][2]
    pp_df_temp['Metric_name'] = summary_cols[columns][3]
    #pp_df.append(pp_df_temp)
    
    pp_rdm_df_temp1 = summarize_all_combinations(df_temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                                  summary_column='pp_'+columns)
    pp_rdm_df_temp1 = pp_rdm_df_temp1.rename(columns={'Value': 'pp_'+columns})
    
    pp_rdm_df_temp2 = summarize_all_combinations(df_temp, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                                  summary_column='pp_trips')
    pp_rdm_df_temp2 = pp_rdm_df_temp2.rename(columns={'Value': 'pp_trips'})
    
    pp_rdm_df_temp = pd.merge(pp_rdm_df_temp1, pp_rdm_df_temp2, on=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how='left')
    pp_rdm_df_temp[columns] = pp_rdm_df_temp['pp_'+columns]/pp_rdm_df_temp['pp_trips']
    pp_rdm_df_temp = pp_rdm_df_temp[['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income', columns]]
    pp_rdm_df_temp = pp_rdm_df_temp.rename(columns={columns:'Value',
                                                    'orig_rdm_zones': 'Origin_zone', 
                                                    'dest_rdm_zones': 'Dest_zone'})
    
    pp_rdm_df_temp['Population'] = 'Prioirty Population'
    #pp_rdm_df_temp['Origin_zone'] = ''
    #pp_rdm_df_temp['Dest_zone'] = ''
    pp_rdm_df_temp['Zone_ID'] = ''
    pp_rdm_df_temp['Purpose'] = ''
    pp_rdm_df_temp['Mode'] = ''
    pp_rdm_df_temp['Concept_ID'] = concept_id
    pp_rdm_df_temp['Geography'] = 'RDM'
    pp_rdm_df_temp['Total_Increment'] = ''
    pp_rdm_df_temp['Metric_ID'] = summary_cols[columns][1]
    pp_rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.7'
    pp_rdm_df_temp['Description'] = 'Average ' + summary_cols[columns][0] + ' in the region'
    pp_rdm_df_temp['Units'] = summary_cols[columns][2]
    pp_rdm_df_temp['Metric_name'] = summary_cols[columns][3]
    #pp_df_rdm.append(pp_rdm_df_temp)
    
    all_dfs = [reg_df_temp, tb_df_temp, county_df_temp, rdm_df_temp, sd_df_temp, pp_df_temp, pp_rdm_df_temp]

    for dfs in all_dfs:
        metric_name = '_' + summary_cols[columns][3].replace(' ', '_') + '_'
        dfs = dfs.reset_index(drop=True)
        dfs = dfs[perf_measure_columns]
        file_name = dfs['Submetric'][0]
        geography = '_' + dfs['Geography'][0].replace(' ', '_')
        dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
        print(len(dfs), file_name, dfs['Metric_name'][0])

    combined_df = pd.concat([reg_df_temp, tb_df_temp, county_df_temp, rdm_df_temp, df_temp, pp_df_temp]).reset_index(drop=True)
    combined_df.to_csv(_join(summary_dir, summary_cols[columns][1] + metric_name + concept_id + '_region' +filename_extension+'.csv'), index=None)
       
#pp_df = pd.concat(pp_df)
#reg_df = pd.concat(reg_df)
#county_df = pd.concat(county_df)
#sd_df = pd.concat(sd_df)
#rdm_df = pd.concat(rdm_df)
#tb_df = pd.concat(tb_df)

ivt
30 A1.2.1 actual in vehicle travel time
30 A1.2.2 actual in vehicle travel time
2391 A1.2.3 actual in vehicle travel time
625346 A1.2.4 actual in vehicle travel time
23747 A1.2.5 actual in vehicle travel time
30 A1.2.6 actual in vehicle travel time
625346 A1.2.7 actual in vehicle travel time
wait
30 A1.3.1 actual wait time
30 A1.3.2 actual wait time
2391 A1.3.3 actual wait time
625346 A1.3.4 actual wait time
23747 A1.3.5 actual wait time
30 A1.3.6 actual wait time
625346 A1.3.7 actual wait time
wacc
30 A1.4.1 actual walk access time
30 A1.4.2 actual walk access time
2170 A1.4.3 actual walk access time
503342 A1.4.4 actual walk access time
20982 A1.4.5 actual walk access time
30 A1.4.6 actual walk access time
503342 A1.4.7 actual walk access time
wegr
30 A1.5.1 actual walk egress time
30 A1.5.2 actual walk egress time
2303 A1.5.3 actual walk egress time
523090 A1.5.4 actual walk egress time
21872 A1.5.5 actual walk egress time
30 A1.5.6 actual walk egress time
523090 A1.5.7 actual w

all_dfs = pd.concat([reg_df, tb_df, county_df, rdm_df, sd_df, pp_df], ignore_index=True).reset_index(drop=True)

unique_metric_ids = all_dfs['Metric_ID'].unique()

for mids in unique_metric_ids:
    print(mids)
    dfs = all_dfs.loc[all_dfs['Metric_ID']==mids]
    usbmi = dfs['Submetric'].unique()
    
    for mi in usbmi:
        print(mi)
        dfs_temp = dfs.loc[dfs['Submetric'] == mi]
        dfs_temp = dfs_temp[perf_measure_columns]
        dfs_temp = dfs_temp.reset_index(drop=True)
        file_name = mi 
        metric_name = dfs_temp['Metric_name'][0].replace(' ', '_')
        geography = '_' + dfs['Geography'][0].replace(' ', '_')
        dfs_temp.to_csv(_join(summary_dir, file_name + '_' + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
        print(len(dfs_temp), file_name, dfs_temp['Metric_name'][0], dfs_temp['Value'].sum())
    
    dfs = dfs.reset_index(drop=True)
    file_name = mids 
    metric_name = dfs['Metric_name'][0]
    dfs.to_csv(_join(summary_dir, file_name + metric_name + '_' + concept_id + '_region' + filename_extension +  '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0], dfs['Value'].sum())
    
    #'A3.1' + '_weekday_linked_trips_' + concept_id + '_region' +filename_extension+'.csv'
    
    #metric_name = dfs['Metric_name'][0]
    #
    #
    #dfs.to_csv(_join(summary_dir, metric_name + file_name + filename_extension +  '.csv'), index=None)
    #print(len(dfs), file_name, dfs['Metric_name'][0], dfs['Value'].sum())

## B2.1