In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

In [3]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

In [4]:
transbay_od

Unnamed: 0,transbay_o,transbay_d,transbay_od
0,9,1,1
1,1530,1,1
2,1531,1,1
3,1532,1,1
4,1533,1,1
...,...,...,...
3320390,1752,3353,1
3320391,1757,3353,1
3320392,1779,3353,1
3320393,1802,3353,1


In [5]:
def skim_core_to_df(skim, core, cols =['orig', 'dest', 'rail_od']):
    skim_df = pd.DataFrame(skim[core])
    skim_df = pd.melt(skim_df.reset_index(), id_vars='index', value_vars=skim_df.columns)
    skim_df['index'] = skim_df['index'] + 1
    skim_df['variable'] = skim_df['variable'] + 1
    skim_df.columns = cols

    return skim_df

def array2df(array, cols =['orig', 'dest', 'rail_od']):
    df = pd.DataFrame(array)
    df = pd.melt(df.reset_index(), id_vars='index', value_vars=df.columns)
    df['index'] = df['index'] + 1
    df['variable'] = df['variable'] + 1
    df.columns = cols
    
    return df

def weighted_average(df, value, weight):
    val = df[value]
    wt = df[weight]
    return (val * wt).sum() / wt.sum()

def rename_columns(df, old_cols, new_cols=['Orig_zone', 'Dest_zone', 'Value']):
    """
    Renames columns of a pandas dataframe using lists of old and new column names.

    Args:
    df (pandas.DataFrame): The dataframe to rename columns in.
    old_cols (list): A list of old column names.
    new_cols (list): A list of new column names.

    Returns:
    pandas.DataFrame: The renamed dataframe.
    """

    # Use pandas' rename method to rename columns
    df = df.rename(columns=dict(zip(old_cols, new_cols)))

    return df

In [6]:
#trip roster
def create_trip_roster(ctramp_dir, transbay_od, geo_cwks, link21_purp_mapping):
    
    
    ind_trip = pd.read_csv(_join(ctramp_dir, 'main\\indivTripData_' + str(iteration) + '.csv'))
    jnt_trip = pd.read_csv(_join(ctramp_dir, 'main\\jointTripData_' + str(iteration) + '.csv'))
    
    jnt_trip['tours'] = 'joint'
    ind_trip['tours'] = 'inm'
    
    ind_drop_columns = ['avAvailable', 'sampleRate', 'taxiWait', 'singleTNCWait', 
                    'sharedTNCWait', 'orig_walk_segment', 'dest_walk_segment',
                    'person_id', 'person_num', 'parking_taz']

    jnt_drop_columns = ['avAvailable', 'sampleRate', 'taxiWait', 'singleTNCWait', 
                    'sharedTNCWait', 'orig_walk_segment', 'dest_walk_segment',
                'parking_taz', 'num_participants']

    ind_trip = ind_trip.drop(columns = ind_drop_columns)
    jnt_trip = jnt_trip.drop(columns = jnt_drop_columns)

    out_tripdata = pd.concat([ind_trip, jnt_trip])
    
    # add transbay_od to final tours
    out_tripdata = pd.merge(out_tripdata, transbay_od, left_on= ['orig_taz', 'dest_taz'], right_on = ['transbay_o', 'transbay_d'], how = 'left')
    out_tripdata['transbay_od'] = out_tripdata['transbay_od'].fillna(0)

    out_tripdata = out_tripdata.drop(columns = ['transbay_o', 'transbay_d'])
    #print(out_tripdata['transbay_od'].value_counts())

    # add geographies to final tours
    out_tripdata = pd.merge(out_tripdata, geo_cwks, left_on = ['orig_taz'], right_on = ['taz'], how = 'left')
    out_tripdata = out_tripdata.rename(columns = {'rdm_zones':'orig_rdm_zones', 
                                                'super_district': 'orig_super_dist',
                                                'county': 'orig_county'})
    del out_tripdata['taz']

    out_tripdata = pd.merge(out_tripdata, geo_cwks, left_on = ['dest_taz'], right_on = ['taz'], how = 'left')
    out_tripdata = out_tripdata.rename(columns = {'rdm_zones':'dest_rdm_zones', 
                                                'super_district': 'dest_super_dist',
                                                'county': 'dest_county'})

    del out_tripdata['taz']

    out_tripdata = pd.merge(out_tripdata, hh, on = 'hh_id', how = 'left')

    # add prioirty population
    out_tripdata = pd.merge(out_tripdata, pp_perc, left_on = ['home_zone'], right_on = ['taz'], how = 'left')
    print("NAs in PP Share:",  out_tripdata['pp_share'].isna().sum())
    # out_tourdata['pp_share'] = out_tourdata['pp_share'].fillna(0)
    del out_tripdata['taz']
    
    #add link21 purpose definitions
    df = out_tripdata.copy()
    df['new_dest_purp'] = df['dest_purpose']
    df['new_orig_purp'] = df['orig_purpose']
    
    # changing the purpose categories for atwork purpose
    df.loc[(df['tour_purpose'] == 'atwork_eat') & (df['dest_purpose'] == 'atwork'), 'new_dest_purp'] = 'eatout'
    df.loc[(df['tour_purpose'] == 'atwork_eat') & (df['orig_purpose'] == 'atwork'), 'new_orig_purp'] = 'eatout'

    df.loc[(df['tour_purpose'] == 'atwork_business') & (df['dest_purpose'] == 'atwork'), 'new_dest_purp'] = 'business'
    df.loc[(df['tour_purpose'] == 'atwork_business') & (df['orig_purpose'] == 'atwork'), 'new_orig_purp'] = 'business'

    df.loc[(df['tour_purpose'] == 'atwork_maint') & (df['dest_purpose'] == 'atwork'), 'new_dest_purp'] = 'othmaint'
    df.loc[(df['tour_purpose'] == 'atwork_maint') & (df['orig_purpose'] == 'atwork'), 'new_orig_purp'] = 'othmaint'
    
    # adding new link21 trip purpose
    df['link21_tour_purp'] = df['tour_purpose'].map(link21_purp_mapping)
    df['link21_orig_purp'] = df['new_orig_purp'].map(link21_purp_mapping)
    df['link21_dest_purp'] = df['new_dest_purp'].map(link21_purp_mapping)

    df['link21_trip_purp'] = df['link21_dest_purp']
    
    # for last trip on tour
    df1 = df.loc[(df['link21_dest_purp'] == 'home')]
    conditions = [
        df1['link21_tour_purp'].eq('work'),
        df1['link21_tour_purp'].eq('school'),
        ~df1['link21_tour_purp'].isin(['work','school'])
    ]

    choices = ['work', 'school', df1['link21_orig_purp']]
    df1['link21_trip_purp'] = np.select(conditions, choices, default=0)
    df2 = df.loc[(df['link21_dest_purp'] != 'home')]
    df2['link21_trip_purp'] = df2['link21_dest_purp']
    df = pd.concat([df1, df2], ignore_index=True)
    
    df1 = df.loc[df['dest_purpose'] == 'atwork']
    conditions = [
        df1['link21_tour_purp'].eq('business'),
        ~df1['link21_tour_purp'].eq('business')
    ]
    choices = ['business', df1['link21_orig_purp']]
    df1['link21_trip_purp'] = np.select(conditions, choices, default=0)
    
    df2 = df.loc[(df['dest_purpose'] != 'atwork')]
    df = pd.concat([df1, df2], ignore_index=True)
    
    df['trips'] = 1
    
    return df

In [7]:
df_trips = create_trip_roster(ctramp_dir, transbay_od, geo_cwks, link21_purp_mapping)
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

df_trn['Period'] = df_trn['depart_hour'].map(time_period_mapping)
df_trn['Mode'] = df_trn['trip_mode'].map(mode_cat_mapping)

NAs in PP Share: 0


In [7]:
df_trn

Unnamed: 0,hh_id,tour_id,stop_id,inbound,tour_purpose,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,...,pp_share,new_dest_purp,new_orig_purp,link21_tour_purp,link21_orig_purp,link21_dest_purp,link21_trip_purp,trips,Period,Mode
22,1494485,11,1,0,atwork_business,eatout,atwork,346,14,12,...,0.0,business,eatout,business,social,business,business,1,md,WALK_TRANSIT
37,1222372,11,-1,0,atwork_eat,Work,atwork,173,73,16,...,0.0,eatout,Work,social,work,social,work,1,pm,WALK_TRANSIT
43,1221407,11,-1,0,atwork_eat,Work,atwork,23,452,14,...,0.0,eatout,Work,social,work,social,work,1,md,WALK_TRANSIT
75,1221572,11,-1,0,atwork_eat,Work,atwork,23,14,10,...,0.0,eatout,Work,social,work,social,work,1,md,WALK_TRANSIT
77,1222015,11,-1,0,atwork_eat,Work,atwork,68,82,11,...,0.0,eatout,Work,social,work,social,work,1,md,WALK_TRANSIT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3833834,913047,0,-1,0,shopping,Home,shopping,2862,2844,11,...,100.0,shopping,Home,shopping,home,shopping,shopping,1,md,WALK_TRANSIT
3839316,2731484,0,1,0,shopping,social,shopping,3149,3167,10,...,100.0,shopping,social,shopping,social,shopping,shopping,1,md,WALK_TRANSIT
3839378,2735376,0,-1,0,shopping,Home,shopping,3145,3215,9,...,100.0,shopping,Home,shopping,home,shopping,shopping,1,am,WALK_TRANSIT
3839748,2753414,0,-1,0,shopping,Home,shopping,3157,3167,9,...,100.0,shopping,Home,shopping,home,shopping,shopping,1,am,WALK_TRANSIT


In [7]:
#per = 'AM'
#acc = 'WLK_TRN_PNR'

for period in time_periods:
    
    for acc_egg in acc_egg_modes:
        
        file_name = _join(params['best_path_skim_dir'], period.lower() +'_'+ acc_egg +'_v9_1_release11302022_bestpathresults.omx')
    
        if os.path.exists(file_name):
            
            skim = omx.open_file(file_name)

            trip_time = np.array(skim['IVT']) + np.array(skim['DTIME']) + np.array(skim['WACC']) + \
                        np.array(skim['WAIT']) + np.array(skim['WAUX']) + np.array(skim['WEGR'])

            ttime = array2df(trip_time, cols = ['orig', 'dest', 'trip_time'])

            ivt = skim_core_to_df(skim, 'IVT', cols =['orig', 'dest', 'ivt'])
            wacc = skim_core_to_df(skim, 'WACC', cols =['orig', 'dest', 'wacc'])
            wait = skim_core_to_df(skim, 'WAIT', cols =['orig', 'dest', 'wait'])
            wegr = skim_core_to_df(skim, 'WEGR', cols =['orig', 'dest', 'wegr'])
            dtime = skim_core_to_df(skim, 'DTIME', cols =['orig', 'dest', 'dtime'])
            xwait = skim_core_to_df(skim, 'XWAIT', cols =['orig', 'dest', 'xwait'])

            df_trn_skim = pd.merge(ivt, wacc, on = ['orig', 'dest'], how='left').merge(
                                   wait, on=['orig', 'dest'], how = 'left').merge(
                                   wegr, on=['orig', 'dest'], how = 'left').merge(
                                   dtime, on=['orig', 'dest'], how = 'left').merge(
                                   xwait, on=['orig', 'dest'], how = 'left').merge(
                                   ttime, on=['orig', 'dest'], how = 'left')

            skim.close()

            df_trn_skim.to_parquet(_join(params['best_path_skim_dir'], period.lower() +'_'+ acc_egg +'_cores.parquet'))

        else:
            print(f"file doesn't exist for time period: {period} and mode : {acc_egg}")

file doesn't exist for time period: md and mode : WLK_TRN_WLK
file doesn't exist for time period: md and mode : KNR_TRN_WLK
file doesn't exist for time period: md and mode : PNR_TRN_WLK
file doesn't exist for time period: md and mode : WLK_TRN_PNR
file doesn't exist for time period: md and mode : WLK_TRN_KNR
file doesn't exist for time period: pm and mode : WLK_TRN_WLK
file doesn't exist for time period: pm and mode : KNR_TRN_WLK
file doesn't exist for time period: pm and mode : PNR_TRN_WLK
file doesn't exist for time period: pm and mode : WLK_TRN_PNR
file doesn't exist for time period: pm and mode : WLK_TRN_KNR
file doesn't exist for time period: ev and mode : WLK_TRN_WLK
file doesn't exist for time period: ev and mode : KNR_TRN_WLK
file doesn't exist for time period: ev and mode : PNR_TRN_WLK
file doesn't exist for time period: ev and mode : WLK_TRN_PNR
file doesn't exist for time period: ev and mode : WLK_TRN_KNR
file doesn't exist for time period: ea and mode : WLK_TRN_WLK
file doe

In [8]:
#df_trn

In [8]:
time_periods = ['AM']
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_trn_pd = df_trn[df_trn['Period'] == period.lower()]
    df_trn_wlk = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_trn_pnr = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_knr = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    
    #for acc_egg in acc_egg_modes:
    #for Walk transit Walk
    df_skm_wk = pd.read_parquet(_join(params['best_path_skim_dir'], period.lower() +'_WLK_TRN_WLK' + '_cores.parquet'))
    
    
    df_skm_wk = df_skm_wk.loc[df_skm_wk['ivt']>0]


    df_wlk = pd.merge(df_trn_wlk, df_skm_wk, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')

    # PNR Transit
    df_skm_pnr_ib = pd.read_parquet(_join(params['best_path_skim_dir'], period.lower() +'_WLK_TRN_PNR' + '_cores.parquet'))
    df_skm_pnr_ib = df_skm_pnr_ib[df_skm_pnr_ib['ivt']>0]

    df_skm_pnr_ob = pd.read_parquet(_join(params['best_path_skim_dir'], period.lower() +'_PNR_TRN_WLK' + '_cores.parquet')) 
    df_skm_pnr_ob = df_skm_pnr_ob[df_skm_pnr_ob['ivt']>0]

    df_trn_pnr_ib = df_trn_pnr[df_trn_pnr['inbound'] == 1] # returning home
    df_trn_pnr_ib = pd.merge(df_trn_pnr_ib, df_skm_pnr_ib, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr_ob = df_trn_pnr[df_trn_pnr['inbound'] != 1] # returning home
    df_trn_pnr_ob = pd.merge(df_trn_pnr_ob, df_skm_pnr_ob, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_pnr = pd.concat([df_trn_pnr_ib, df_trn_pnr_ob], ignore_index=True)

        # KNR Transit
    df_skm_knr_ib = pd.read_parquet(_join(params['best_path_skim_dir'], period.lower() +'_WLK_TRN_KNR' + '_cores.parquet'))
    df_skm_knr_ib = df_skm_pnr_ib[df_skm_pnr_ib['ivt']>0]

    df_skm_knr_ob = pd.read_parquet(_join(params['best_path_skim_dir'], period.lower() +'_KNR_TRN_WLK' + '_cores.parquet')) 
    df_skm_knr_ob = df_skm_knr_ob[df_skm_knr_ob['ivt']>0]

    df_trn_knr_ib = df_trn_knr[df_trn_knr['inbound'] == 1] # returning home
    df_trn_knr_ib = pd.merge(df_trn_knr_ib, df_skm_knr_ib, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr_ob = df_trn_knr[df_trn_knr['inbound'] != 1] # returning home
    df_trn_knr_ob = pd.merge(df_trn_knr_ob, df_skm_knr_ob, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_knr = pd.concat([df_trn_knr_ib, df_trn_knr_ob], ignore_index=True)

    df_trn_rail = pd.concat([df_wlk, df_pnr, df_knr], ignore_index=True)
    df_temp.append(df_trn_rail)
    

df_trn_rail = pd.concat(df_temp)

processing - AM


In [9]:
df_trn_rail

Unnamed: 0,hh_id,tour_id,stop_id,inbound,tour_purpose,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,...,Mode,orig,dest,ivt,wacc,wait,wegr,dtime,xwait,trip_time
0,1282915,11,-1,0,atwork_eat,Work,atwork,2302,607,7,...,WALK_TRANSIT,2302,607,3089.0,437.0,988.0,1146.0,0.0,126.0,5678.0
1,1308016,11,-1,0,atwork_eat,Work,atwork,63,81,9,...,WALK_TRANSIT,63,81,450.0,346.0,60.0,2692.0,0.0,0.0,3548.0
2,1355395,11,-1,0,atwork_maint,Work,atwork,304,394,7,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0
3,1333012,0,-1,0,work_high,Home,work,304,394,6,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0
4,1332843,0,-1,0,work_very high,Home,work,304,394,7,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22702,1099326,0,-1,0,work_med,Home,work,3330,2226,6,...,KNR_TRANSIT,3330,2226,3109.0,0.0,308.0,727.0,4575.0,0.0,8719.0
22703,1100230,0,-1,0,work_very high,Home,work,3331,60,6,...,KNR_TRANSIT,3331,60,186.0,0.0,246.0,325.0,6947.0,0.0,7704.0
22704,1657471,0,-1,0,eatout,Home,eatout,739,10,9,...,KNR_TRANSIT,739,10,3538.0,0.0,273.0,1542.0,4179.0,0.0,9532.0
22705,305045,0,-1,0,eatout,Home,eatout,2135,68,9,...,KNR_TRANSIT,2135,68,3020.0,0.0,308.0,1871.0,1489.0,0.0,6688.0


In [12]:
df_trn_rail.columns

Index(['hh_id', 'tour_id', 'stop_id', 'inbound', 'tour_purpose',
       'orig_purpose', 'dest_purpose', 'orig_taz', 'dest_taz', 'depart_hour',
       'trip_mode', 'tour_mode', 'tour_category', 'tours', 'transbay_od',
       'orig_rdm_zones', 'orig_super_dist', 'orig_county', 'dest_rdm_zones',
       'dest_super_dist', 'dest_county', 'home_zone', 'pp_share',
       'new_dest_purp', 'new_orig_purp', 'link21_tour_purp',
       'link21_orig_purp', 'link21_dest_purp', 'link21_trip_purp', 'trips',
       'Period', 'Mode', 'orig', 'dest', 'ivt', 'wacc', 'wait', 'wegr',
       'dtime', 'xwait', 'trip_time'],
      dtype='object')

In [59]:
# summarise for prioirty population
summary_cols = params['description_a1.2']

time_periods = ['AM']


pp_df = []
reg_df = []
county_df = []
sd_df = []
tb_df = []
rdm_df = []


for columns in summary_cols:
    
    for period in time_periods:
    
        df_temp = df_trn_rail[(df_trn_rail[columns] > 0) & (df_trn_rail['Period'] == period.lower())]

        #prioirty population
        df_temp['pp_share'] = df_temp['pp_share']/100
        pp_value = weighted_average(df_temp, 'ivt', 'pp_share')
        pp_df_temp = pd.DataFrame({'Population': 'Prioirty Population',
                              'Period': period,
                              'Value': pp_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.2',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        pp_df.append(pp_df_temp)
        
        #regional value
        region_value = df_temp[columns].mean()
        reg_df_temp = pd.DataFrame({'Population': 'Whole Population',
                              'Period': period,
                              'Value': region_value,
                              'Orig_zone' : '',
                              'Dest_zone' : '',
                              'Zone_ID' : 'Megaregion',
                              'Geography' : 'Regional',
                              'Metric':summary_cols[columns][1],
                              'Submetric':summary_cols[columns][1]+'.1',
                              'Description' : summary_cols[columns][0],
                              'Units' : summary_cols[columns][2],
                              'Metric_name' : summary_cols[columns][3]}, index=[0])
        reg_df.append(reg_df_temp)
        
        #county 
        county_df_temp = df_temp.groupby(['orig_county', 'dest_county'])[columns].mean().reset_index()
        county_df_temp = rename_columns(county_df_temp, ['orig_county', 'dest_county', columns])
        
        county_df_temp['Period'] = period
        county_df_temp['Population'] = 'Whole Population'
        county_df_temp['Zone_Id'] = ''
        county_df_temp['Geography'] = 'County'
        county_df_temp['Description'] = summary_cols[columns][0]
        county_df_temp['Metric'] = summary_cols[columns][1]
        county_df_temp['Submetric'] = summary_cols[columns][1]+'.5'
        county_df_temp['Units'] = summary_cols[columns][2]
        county_df_temp['Metric_name'] = summary_cols[columns][3]
        county_df.append(county_df_temp)
        
        #super district
        sd_df_temp = df_temp.groupby(['orig_super_dist', 'dest_super_dist'])[columns].mean().reset_index()
        sd_df_temp = rename_columns(sd_df_temp, ['orig_super_dist', 'dest_super_dist', columns])
        
        sd_df_temp['Period'] = period
        sd_df_temp['Population'] = 'Whole Population'
        sd_df_temp['Zone_ID'] = ''
        sd_df_temp['Geography'] = 'Superdistrict'
        sd_df_temp['Description'] = summary_cols[columns][0]
        sd_df_temp['Metric'] = summary_cols[columns][1]
        sd_df_temp['Submetric'] = summary_cols[columns][1]+'.4'
        sd_df_temp['Units'] = summary_cols[columns][2]
        sd_df_temp['Metric_name'] = summary_cols[columns][3]
        sd_df.append(sd_df_temp)
        
        #RDM Zones
        rdm_df_temp = df_temp.groupby(['orig_rdm_zones', 'dest_rdm_zones'])[columns].mean().reset_index()
        rdm_df_temp =  rename_columns(rdm_df_temp, ['orig_rdm_zones', 'dest_rdm_zones', columns])
        
        rdm_df_temp['Period'] = period
        rdm_df_temp['Population'] = 'Whole Population'
        rdm_df_temp['Zone_ID'] = ''
        rdm_df_temp['Geography'] = 'RDM'
        rdm_df_temp['Description'] = summary_cols[columns][0]     
        rdm_df_temp['Metric'] = summary_cols[columns][1]
        rdm_df_temp['Submetric'] = summary_cols[columns][1]+'.3'
        rdm_df_temp['Units'] = summary_cols[columns][2]
        rdm_df_temp['Metric_name'] = summary_cols[columns][3]
        rdm_df.append(rdm_df_temp)
        
        
        # transbay region
        tb_value = df_temp[df_temp['transbay_od']==1]
        tb_value = tb_value[columns].mean()
        
        tb_df_temp = pd.DataFrame({'Population': 'Whole Population',
                                   'Period': period,
                                   'Value': tb_value,
                                   'Orig_zone' : '',
                                   'Dest_zone' : '',
                                   'Zone_ID' : 'Megaregion',
                                   'Geography' : 'Transbay',
                                   'Metric':summary_cols[columns][1],
                                   'Submetric':summary_cols[columns][1]+'.6',
                                   'Description' : summary_cols[columns][0],
                                   'Units' : summary_cols[columns][2],
                                   'Metric_name' : summary_cols[columns][3]}, index=[0])
                                    
        tb_df.append(tb_df_temp)                     
    
    
pp_df = pd.concat(pp_df)
reg_df = pd.concat(reg_df)
county_df = pd.concat(county_df)
sd_df = pd.concat(sd_df)
rdm_df = pd.concat(rdm_df)
tb_df = pd.concat(tb_df)

In [60]:
pp_df

Unnamed: 0,Population,Period,Value,Orig_zone,Dest_zone,Zone_ID,Geography,Metric,Submetric,Description,Units,Metric_name
0,Prioirty Population,AM,3321.278732,,,Megaregion,Regional,A1.2,A1.2.2,in-vehicle travel time,seconds,actual in vehicle travel time
0,Prioirty Population,AM,3321.278732,,,Megaregion,Regional,A1.3,A1.3.2,wait time,seconds,actual wait time
0,Prioirty Population,AM,3079.036758,,,Megaregion,Regional,A1.4,A1.4.2,walk access time,seconds,actual walk access time
0,Prioirty Population,AM,3308.453798,,,Megaregion,Regional,A1.4,A1.4.2,walk egress time,seconds,actual walk egress time
0,Prioirty Population,AM,3646.306682,,,Megaregion,Regional,A1.5,A1.5.2,drive time,seconds,actual drive time
0,Prioirty Population,AM,4298.451438,,,Megaregion,Regional,A1.6,A1.6.2,transfer time,seconds,actual transfer time
0,Prioirty Population,AM,3321.278732,,,Megaregion,Regional,A1.7,A1.7.2,total trip time,seconds,actual total trip time


In [42]:
# summary by geography

#priority population
#df_temp = pd.merge(df_temp, pp_perc, left_on=['home_zone'], right_on=['taz'], how ='left')
#df_temp['pp_share'] = df_temp['pp_share']/100

In [41]:
df_temp.columns

Index(['hh_id', 'tour_id', 'stop_id', 'inbound', 'tour_purpose',
       'orig_purpose', 'dest_purpose', 'orig_taz', 'dest_taz', 'depart_hour',
       'trip_mode', 'tour_mode', 'tour_category', 'tours', 'transbay_od',
       'orig_rdm_zones', 'orig_super_dist', 'orig_county', 'dest_rdm_zones',
       'dest_super_dist', 'dest_county', 'home_zone', 'pp_share_x',
       'new_dest_purp', 'new_orig_purp', 'link21_tour_purp',
       'link21_orig_purp', 'link21_dest_purp', 'link21_trip_purp', 'trips',
       'Period', 'Mode', 'orig', 'dest', 'ivt', 'wacc', 'wait', 'wegr',
       'dtime', 'xwait', 'trip_time', 'taz', 'pp_share_y'],
      dtype='object')

## B2.1

In [10]:
df_trn_rail[0:5]

Unnamed: 0,hh_id,tour_id,stop_id,inbound,tour_purpose,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,...,Mode,orig,dest,ivt,wacc,wait,wegr,dtime,xwait,trip_time
0,1282915,11,-1,0,atwork_eat,Work,atwork,2302,607,7,...,WALK_TRANSIT,2302,607,3089.0,437.0,988.0,1146.0,0.0,126.0,5678.0
1,1308016,11,-1,0,atwork_eat,Work,atwork,63,81,9,...,WALK_TRANSIT,63,81,450.0,346.0,60.0,2692.0,0.0,0.0,3548.0
2,1355395,11,-1,0,atwork_maint,Work,atwork,304,394,7,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0
3,1333012,0,-1,0,work_high,Home,work,304,394,6,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0
4,1332843,0,-1,0,work_very high,Home,work,304,394,7,...,WALK_TRANSIT,304,394,1411.0,1657.0,451.0,161.0,0.0,246.0,3731.0


In [None]:
df_trn_rail