In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [3]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']
summary_dir = params['summary_dir']

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']

In [4]:
# df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))
# df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster_baseline_for_A1.10_crowding.parquet'))
df_base_cwd_skim = pd.read_parquet(_join(preprocess_dir, 'A1.10_baseline_connected_cwd_skim.parquet'))

In [5]:
transit_skims_dir

'C:\\MTC_tmpy\\TM2_2050Baseline_R2_Run4\\tm2py\\examples\\Link21_3332\\skims\\transit'

In [6]:
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])].copy()

In [7]:
create_rail_crowding_od_pairs(preprocess_dir, transit_skims_dir, time_periods, acc_egg_modes)

Period:  am
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  md
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  pm
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ev
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ea
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR


In [8]:
df_connected_cwd_skim = []
df_rail_trips = []

for period in time_periods:
    print(f'Processing - {period.upper()}')
    
    # Read in which ODs have either nonzero IVTHVY or IVTCOM. The matrix below contains 0s and 1s.
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    
    # Read in crowding times between ODs (already multiplied by 1.62). Disconnected ODs have a 0 value.
    df_od_cwd = omx.open_file(_join(preprocess_dir, "rail_crowding_od_v9_trim_" + period.upper() + ".omx"))
    df_od_cwd_base = df_base_cwd_skim.loc[df_base_cwd_skim['period']==period]
    
    # Read in transit trips of this period.
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    # Walk, transit, walk.
    print('Walk, transit, walk.')
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]    
    # df_connected_wlk = df_rail_od.copy()
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_WLK', cols =['orig', 'dest', 'crowd'])    
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')    
    df_rail_cwd['period'] = period
    df_rail_cwd['mode'] = 'WLK_TRN_WLK'
    df_connected_cwd_skim_wlk = df_rail_cwd.copy()    
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='WLK_TRN_WLK')]    
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')
    
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_trn_wlk = df_trn_acc.merge(df_rail_cwd,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')    
    # print(df_trn_wlk.columns)
    
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    
    print('Park-n-ride, transit, walk.')
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]    
    # df_connected_pnr_inbnd = df_rail_od.copy()
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'PNR_TRN_WLK', cols =['orig', 'dest', 'crowd'])
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')
    df_rail_cwd['period'] = period
    df_rail_cwd['mode'] = 'PNR_TRN_WLK'
    df_connected_cwd_skim_pnr_inbnd = df_rail_cwd.copy()
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='PNR_TRN_WLK')]
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')
    
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1]  # Going to work
    df_trn_pnr_inbnd = df_trn_acc_inbnd.merge(df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')    
    
    print('Walk, transit, Park-n-ride.')
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    # df_connected_pnr_outbnd = df_rail_od.copy()
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_PNR', cols =['orig', 'dest', 'crowd'])
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')
    df_rail_cwd['period'] = period
    df_rail_cwd['mode'] = 'WLK_TRN_PNR'
    df_connected_cwd_skim_pnr_outbnd = df_rail_cwd.copy()
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='WLK_TRN_PNR')]
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')
    
    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1]  # Returning home
    df_trn_pnr_outbnd = df_trn_acc_outbnd.merge(df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    df_connected_cwd_skim_pnr = pd.concat([df_connected_cwd_skim_pnr_inbnd, df_connected_cwd_skim_pnr_outbnd],
                                          ignore_index=True)
    df_trn_pnr = pd.concat([df_trn_pnr_inbnd, df_trn_pnr_outbnd], ignore_index=True)
    
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    
    print('Kiss-n-ride, transit, walk.')
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    # df_connected_knr_inbnd = df_rail_od.copy()
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'KNR_TRN_WLK', cols =['orig', 'dest', 'crowd'])
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')   
    df_rail_cwd['period'] = period
    df_rail_cwd['mode'] = 'KNR_TRN_WLK'
    df_connected_cwd_skim_knr_inbnd = df_rail_cwd.copy()
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='KNR_TRN_WLK')]
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')
    
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1]  # Going to work
    df_trn_knr_inbnd = df_trn_acc_inbnd.merge(df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')    
    
    print('Walk, transit, Kiss-n-ride.')
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]    
    # df_connected_knr_outbnd = df_rail_od.copy()
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_KNR', cols =['orig', 'dest', 'crowd'])
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')
    df_rail_cwd['period'] = period
    df_rail_cwd['mode'] = 'WLK_TRN_KNR'
    df_connected_cwd_skim_knr_outbnd = df_rail_cwd.copy()
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='WLK_TRN_KNR')]
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')
    
    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1]  # Returning home
    df_trn_knr_outbnd = df_trn_acc_outbnd.merge(df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    df_connected_cwd_skim_knr = pd.concat([df_connected_cwd_skim_knr_inbnd, df_connected_cwd_skim_knr_outbnd],
                                          ignore_index=True)
    df_trn_knr = pd.concat([df_trn_knr_inbnd, df_trn_knr_outbnd], ignore_index=True)
    
    
    # Now, concatinate the three connected-by-rail skim dataframes.
    print('Concatinating crowding skims for the connected ODs of the period.')
    df_period_connected_cwd_skim = pd.concat([df_connected_cwd_skim_wlk,
                                              df_connected_cwd_skim_pnr,
                                              df_connected_cwd_skim_knr],
                                             ignore_index=True)
    df_period_connected_cwd_skim.drop(columns=['rail_od'], inplace=True)
    df_connected_cwd_skim.append(df_period_connected_cwd_skim)    
    
    # Now, concatinate the three rail trip dataframes.
    print('Concatinating the three rail trip dataframes of the period.')
    df_period_trips = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_rail_trips.append(df_period_trips)

df_connected_cwd_skim = pd.concat(df_connected_cwd_skim)
df_trn_rail = pd.concat(df_rail_trips)

Processing - AM
Walk, transit, walk.
Park-n-ride, transit, walk.
Walk, transit, Park-n-ride.
Kiss-n-ride, transit, walk.
Walk, transit, Kiss-n-ride.
Concatinating crowding skims for the connected ODs of the period.
Concatinating the three rail trip dataframes of the period.
Processing - MD
Walk, transit, walk.
Park-n-ride, transit, walk.
Walk, transit, Park-n-ride.
Kiss-n-ride, transit, walk.
Walk, transit, Kiss-n-ride.
Concatinating crowding skims for the connected ODs of the period.
Concatinating the three rail trip dataframes of the period.
Processing - PM
Walk, transit, walk.
Park-n-ride, transit, walk.
Walk, transit, Park-n-ride.
Kiss-n-ride, transit, walk.
Walk, transit, Kiss-n-ride.
Concatinating crowding skims for the connected ODs of the period.
Concatinating the three rail trip dataframes of the period.
Processing - EV
Walk, transit, walk.
Park-n-ride, transit, walk.
Walk, transit, Park-n-ride.
Kiss-n-ride, transit, walk.
Walk, transit, Kiss-n-ride.
Concatinating crowding ski

In [9]:
df_connected_cwd_skim.head(3)

Unnamed: 0,orig,dest,crowd,period,mode
0,267,1,130.655365,am,WLK_TRN_WLK
1,268,1,130.655365,am,WLK_TRN_WLK
2,269,1,130.655365,am,WLK_TRN_WLK


In [22]:
x = int(round(len(df_connected_cwd_skim) / (5 * 5), 0))
# Note: Five TODs and five modes
print(f'Total connected ODs, averaged over all periods and modes: {x:,}')

Total connected ODs, averaged over all periods and modes: 8,291,456


In [21]:
df_connected_cwd_skim = df_connected_cwd_skim.merge(df_od_cwd_base, on=['orig', 'dest', 'period', 'mode'], how='inner')
print(f"{int(round(len(df_connected_cwd_skim) / (5 * 5), 0)):,}")

# Note: There are ODs connected by rail with zero crowding time because there aren't many passengers between those ODs.
x = df_connected_cwd_skim.crowd.describe()
print(f"Average crowding time, including all ODs, periods, and modes, and not weighted by any trip: {x['mean']/100:.3f}")
x

Average crowding time, including all ODs, periods, and modes, and not weighted by any trip: 4.074


count    2.072864e+08
mean     4.074413e+02
std      7.011472e+02
min      0.000000e+00
25%      1.607397e+01
50%      8.575587e+01
75%      5.151603e+02
max      9.038510e+03
Name: crowd, dtype: float64

In [11]:
df_trn_rail.head(3)

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,trip_type,trips,transbay_od,orig_rdm_zones,orig_super_dist,orig_county,dest_rdm_zones,dest_super_dist,dest_county,home_zone,income,Income,pp_share,link21_trip_purp,Period,Mode,orig,dest,crowd,rail_od,period,mode
0,1789254,4338265.0,0,Work,atwork,2216,2291,7,6,1.0,INM,1.0,0.0,Alameda_22,18,4,Alameda_03,19,4,9,256047,100k+,100.0,work,am,WALK_TRANSIT,2216,2291,114.13353,1.0,am,WLK_TRN_WLK
1,592815,1342412.0,0,escort,work,2216,2291,6,6,1.0,INM,1.0,0.0,Alameda_22,18,4,Alameda_03,19,4,1869,121941,100k+,100.0,work,am,WALK_TRANSIT,2216,2291,114.13353,1.0,am,WLK_TRN_WLK
2,334043,646224.0,0,work,work,2216,2291,8,6,1.0,INM,1.0,0.0,Alameda_22,18,4,Alameda_03,19,4,1992,33405,30k_to_60k,100.0,work,am,WALK_TRANSIT,2216,2291,114.13353,1.0,am,WLK_TRN_WLK


In [23]:
# To get total transit trips of a concept scenario, use the baseline script! Do not use this cell.
# x = df_trn_rail.trips.sum()
# print(f'Total number of transit trips: {x:,}')

Total number of transit trips: 1,226,291.0


In [32]:
df_trn_rail['crowd_trips'] = df_trn_rail['trips'] * df_trn_rail['crowd'] / 100

df_trn_rail['pp_trips'] = df_trn_rail['trips'] * df_trn_rail['pp_share'] / 100
df_trn_rail['crowd_pp_trips'] = df_trn_rail['pp_trips'] * df_trn_rail['crowd'] / 100

df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='crowd_trips')
#df_temp1 = df_temp1.rename(columns={'Value':'crowd_trips'})


#df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
#                                      summary_column='trips')
#df_temp2 = df_temp2.rename(columns={'Value':'trips'})


#region_value = pd.merge(df_temp1, df_temp2, on = ['Period', 'Income'], how='left')
#region_value['Value'] = region_value['crowd_trips'] / region_value['trips']

region_value = df_temp1 
region_value = region_value[['Period', 'Income', 'Value']]

#regional value
#region_value = df_trn_rail.groupby(['Period'])['trips'].mean().reset_index()
#region_value = region_value.rename(columns={'trips': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'A1.10'
region_value['Metric_name'] = 'Crowding (Region)'
region_value['Submetric'] = 'A1.10.1'
region_value['Description'] = 'Regional crowding'
region_value['Population'] = 'Whole Population'
region_value['Geography'] = 'Regional'
region_value['Origin_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Purpose'] = ''
region_value['Mode'] = ''
#region_value['Income'] = ''
region_value['Zone_ID'] = ''
region_value['Units'] = 'minutes'
region_value['Total_Increment'] = ''
region_value = region_value[perf_measure_columns]

# summarise for prioirty population
#regional value

df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
                                      summary_column='crowd_pp_trips')
#df_temp1 = df_temp1.rename(columns={'Value':'crowd_pp_trips'}) # BC team is interested in total value instead of mean


#df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period', 'Income'], 
#                                      summary_column='pp_trips')
#df_temp2 = df_temp2.rename(columns={'Value':'pp_trips'})


#pp_region_value = pd.merge(df_temp1, df_temp2, on = ['Period', 'Income'], how='left')
#pp_region_value['Value'] = pp_region_value['crowd_pp_trips'] / pp_region_value['pp_trips']
pp_region_value = df_temp1
pp_region_value = pp_region_value[['Period', 'Income', 'Value']]

#region_value = df_trn_rail.groupby(['Period'])['pp_trips'].mean().reset_index()
#region_value = region_value.rename(columns={'pp_trips': 'Value'})
pp_region_value['Concept_ID'] = concept_id
pp_region_value['Metric_ID'] = 'A1.10'
pp_region_value['Metric_name'] = 'Crowding (Region)'
pp_region_value['Submetric'] = 'A1.10.2'
pp_region_value['Description'] = 'Rgional crowding'
pp_region_value['Population'] = 'Prioirty Population'
pp_region_value['Geography'] = 'Regional'
pp_region_value['Origin_zone'] = ''
pp_region_value['Dest_zone'] = ''
pp_region_value['Purpose'] = ''
pp_region_value['Mode'] = ''
#pp_region_value['Income'] = ''
pp_region_value['Zone_ID'] = ''
pp_region_value['Units'] = 'minutes'
pp_region_value['Total_Increment'] = ''
pp_region_value = pp_region_value[perf_measure_columns]

#df_rdm = df_tours.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Mode', 'Period', 'tour_purpose'])['tours'].sum().reset_index()

df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='crowd_trips')
#df_temp1 = df_temp1.rename(columns={'Value': 'trips'})

#df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
#                                      summary_column='crowd_trips')
#df_temp2 = df_temp2.rename(columns={'Value': 'crowd_trips'})


#df_rdm = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how = 'left')
#df_rdm['Value'] = df_rdm['crowd_trips'] /  df_rdm['trips']

df_rdm = df_temp1

df_rdm = df_rdm.rename(columns={ 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm = df_rdm[['Origin_zone', 'Dest_zone', 'Period', 'Income', 'Value']]

df_rdm['Concept_ID'] = concept_id
df_rdm['Metric_ID'] = 'A1.10'
df_rdm['Metric_name'] = 'Crowding (Region)'
df_rdm['Submetric'] = 'A1.10.3'
df_rdm['Description'] = 'Crowding between RDM zones'
df_rdm['Population'] = 'Whole Population'
df_rdm['Geography'] = 'RDM'
df_rdm['Zone_ID'] = ''
df_rdm['Purpose'] = ''
df_rdm['Mode'] = ''
#df_rdm['Income'] = ''
df_rdm['Units'] = 'minutes'
df_rdm['Total_Increment'] = ''
df_rdm = df_rdm[perf_measure_columns]

#df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
#                                      summary_column='pp_trips')
#df_temp1 = df_temp1.rename(columns={'Value': 'pp_trips'})

df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], 
                                      summary_column='crowd_pp_trips')
#df_temp2 = df_temp2.rename(columns={'Value': 'crowd_pp_trips'})


#df_rdm_pp = df_temp1.merge(df_temp2, on = ['orig_rdm_zones', 'dest_rdm_zones', 'Period', 'Income'], how = 'left')

df_rdm_pp = df_temp2
#df_rdm_pp = df_rdm_pp.loc[df_rdm_pp['pp_trips']>0]

#df_rdm_pp['Value'] = df_rdm_pp['crowd_pp_trips'] /  df_rdm_pp['pp_trips']


df_rdm_pp = df_rdm_pp.rename(columns={ 
                                'orig_rdm_zones' : 'Origin_zone',
                                'dest_rdm_zones' : 'Dest_zone'})
df_rdm_pp = df_rdm_pp[['Origin_zone', 'Dest_zone', 'Period', 'Income' ,'Value']]

df_rdm_pp['Concept_ID'] = concept_id
df_rdm_pp['Metric_ID'] = 'A1.10'
df_rdm_pp['Metric_name'] = 'Crowding (Region)'
df_rdm_pp['Submetric'] = 'A1.10.4'
df_rdm_pp['Description'] = 'Crowding between RDM zones'
df_rdm_pp['Population'] = 'Prioirty Population'
df_rdm_pp['Geography'] = 'RDM'
df_rdm_pp['Zone_ID'] = ''
df_rdm_pp['Purpose'] = ''
df_rdm_pp['Mode'] = ''
#df_rdm_pp['Income'] = ''
df_rdm_pp['Units'] = 'minutes'
df_rdm_pp['Total_Increment'] = ''
df_rdm_pp = df_rdm_pp[perf_measure_columns]

In [33]:
df_temp1 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period'], 
                                      summary_column='crowd_trips')
df_temp1 = df_temp1.rename(columns={'Value':'crowd_trips'})


df_temp2 = summarize_all_combinations(df_trn_rail, groupby_columns=['Period'], 
                                     summary_column='trips')
df_temp2 = df_temp2.rename(columns={'Value':'trips'})


region_value2 = pd.merge(df_temp1, df_temp2, on = ['Period'], how='left')
region_value2['Value'] = region_value2['crowd_trips'] / region_value2['trips']

region_value2 = region_value2[['Period', 'Value']]

#regional value
#region_value = df_trn_rail.groupby(['Period'])['trips'].mean().reset_index()
#region_value = region_value.rename(columns={'trips': 'Value'})
region_value2['Concept_ID'] = concept_id
region_value2['Metric_ID'] = 'A1.10'
region_value2['Metric_name'] = 'Crowding (Region)'
region_value2['Submetric'] = 'A1.10.5'
region_value2['Description'] = 'Regional crowding'
region_value2['Population'] = 'Whole Population'
region_value2['Geography'] = 'Regional'
region_value2['Origin_zone'] = ''
region_value2['Dest_zone'] = ''
region_value2['Purpose'] = ''
region_value2['Mode'] = ''
region_value2['Income'] = ''
region_value2['Zone_ID'] = ''
region_value2['Units'] = 'minutes'
region_value2['Total_Increment'] = ''
region_value2 = region_value2[perf_measure_columns]

In [34]:
# all_dfs = [region_value, pp_region_value, df_rdm, df_rdm_pp, region_value2]
all_dfs = [region_value2]

for dfs in all_dfs:
    metric_name = '_regional_crowding_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0])
    
# combined_df = pd.concat([region_value, pp_region_value, df_rdm, df_rdm_pp]).reset_index(drop=True)
combined_df = region_value2.reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'A10.1' + '_regional_crowding_' + concept_id + '_region' +filename_extension+'.csv'), index=None)

6 A1.10.5 Crowding (Region)


#county 
county_df_temp = df_trn_rail.groupby(['orig_county', 'dest_county', 'Period'])['trips'].mean().reset_index()
county_df_temp = county_df_temp.rename(columns={'trips': 'Value',
                                               'orig_county': 'Origin_zone',
                                               'dest_county': 'Dest_zone'})
county_df_temp['Concept_ID'] = concept_id
county_df_temp['Metric_ID'] = 'A1.9'
county_df_temp['Metric_name'] = 'Crowding (Region)'
county_df_temp['Submetric'] = 'A1.9.2'
county_df_temp['Description'] = 'Crowding level between origin and destination county'
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Geography'] = 'County'
county_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
county_df_temp['Units'] = 'minutes'
county_df_temp['Total_Increment'] = ''


#super district
sd_df_temp = df_trn_rail.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['trips'].mean().reset_index()
sd_df_temp = sd_df_temp.rename(columns={'trips': 'Value',
                                        'orig_super_dist': 'Origin_zone',
                                        'dest_super_dist': 'Dest_zone'})
sd_df_temp['Concept_ID'] = concept_id
sd_df_temp['Metric_ID'] = 'A1.9'
sd_df_temp['Metric_name'] = 'Crowding (Region)'
sd_df_temp['Submetric'] = 'A1.9.3'
sd_df_temp['Description'] = 'Regional crowding level'
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Geography'] = 'Super district'
sd_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
sd_df_temp['Units'] = 'minutes'
sd_df_temp['Total_Increment'] = ''


#RDM Zones
rdm_df_temp = df_trn_rail.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])['trips'].mean().reset_index()
rdm_df_temp = rdm_df_temp.rename(columns={'trips': 'Value',
                                        'orig_rdm_zones': 'Origin_zone',
                                        'dest_rdm_zones': 'Dest_zone'})
rdm_df_temp['Concept_ID'] = concept_id
rdm_df_temp['Metric_ID'] = 'A1.9'
rdm_df_temp['Metric_name'] = 'Crowding (Region)'
rdm_df_temp['Submetric'] = 'A1.9.4'
rdm_df_temp['Description'] = 'Regional crowding level'
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
rdm_df_temp['Units'] = 'minutes'
rdm_df_temp['Total_Increment'] = ''




# transbay region
tb_value = df_trn_rail[df_trn_rail['transbay_od']==1]
tb_value = tb_value.groupby(['Period'])['trips'].mean().reset_index()

tb_value['Concept_ID'] = concept_id
tb_value['Metric_ID'] = 'A1.9'
tb_value['Metric_name'] = 'Crowding (Region)'
tb_value['Submetric'] = 'A1.9.5'
tb_value['Description'] = 'regional crowding level'
tb_value['Population'] = 'Whole Population'
tb_value['Geography'] = 'Transbay'
tb_value['Zone_ID'] = ''
tb_value['Origin_zone'] = ''
tb_value['Dest_zone'] = ''
tb_value['Units'] = 'minutes'
tb_value['Total_Increment'] = ''

df_rail_trips = []

for period in time_periods:
    print(f'processing - {period.upper()}')
    
    # Read in which ODs have either nonzero IVTHVY or IVTCOM. The matrix below contains 0s and 1s.
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    
    # Read in crowding times between ODs (already multiplied by 1.62). Disconnected ODs have a 0 value.
    df_od_cwd = omx.open_file(_join(preprocess_dir, "rail_crowding_od_v9_trim_" + period.upper() + ".omx"))
    
    # Read in transit trips of this period.
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]        
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_WLK', cols =['orig', 'dest', 'crowd'])
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')
    
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    df_trn_wlk = pd.merge(df_trn_wlk, df_rail_cwd,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    #print(df_trn_wlk.columns)
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_PNR', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    #del df_trn_pnr_inb['orig']
    #del df_trn_pnr_inb['dest']
    
    df_trn_pnr_inb = pd.merge(df_trn_pnr_inb, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #print(df_trn_pnr_inb.columns)
    
    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'PNR_TRN_WLK', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #del 
    df_trn_pnr_outbnd = pd.merge(df_trn_pnr_outbnd, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    
    #print(df_trn_pnr_outbnd.columns)

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_KNR', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_inb = pd.merge(df_trn_knr_inb, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK')
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'KNR_TRN_WLK', cols =['orig', 'dest', 'crowd']) # add walk access/egress
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')
    df_trn_knr_outbnd = pd.merge(df_trn_knr_outbnd, df_rail_cwd, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_rail_trips.append(df_trn_rail)

df_trn_rail = pd.concat(df_rail_trips)
print(df_trn_rail.columns)

In [40]:
df_connected_cwd_skim = []
df_all_trips = []

for period in time_periods:
    print(f'Processing - {period.upper()}')
    
    # Read in which ODs have either nonzero IVTHVY or IVTCOM. The matrix below contains 0s and 1s.
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_od_v9_trim_" + period.upper() + ".omx"))
    
    # Read in crowding times between ODs (already multiplied by 1.62). Disconnected ODs have a 0 value.
    df_od_cwd = omx.open_file(_join(preprocess_dir, "rail_crowding_od_v9_trim_" + period.upper() + ".omx"))
    df_od_cwd_base = df_base_cwd_skim.loc[df_base_cwd_skim['period']==period]
    
    # Read in transit trips of this period.
    df_trips_pd = df_trips[df_trips['Period'] == period]
    
    # Only for walk, transit, walk.    
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK')
    df_rail_od = df_rail_od[df_rail_od['rail_od'] > 0]        
    
    df_rail_cwd = skim_core_to_df(df_od_cwd, 'WLK_TRN_WLK', cols =['orig', 'dest', 'crowd'])    
    df_rail_cwd = df_rail_cwd.merge(df_rail_od, on=['orig', 'dest'], how ='inner')    
    df_rail_cwd['period'] = period
    df_connected_cwd_skim_wlk = df_rail_cwd.copy()    
    df_baseline_skim = df_od_cwd_base.loc[(df_od_cwd_base['mode']=='WLK_TRN_WLK')]    
    df_rail_cwd = df_rail_cwd.merge(df_baseline_skim, on=['orig', 'dest'], how='inner')

    df_trips_wlk = df_trips_pd.merge(df_rail_cwd,
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')        
  
    
    df_connected_cwd_skim_wlk.drop(columns=['rail_od'], inplace=True)
    df_connected_cwd_skim.append(df_connected_cwd_skim_wlk)    
    
    df_all_trips.append(df_trips_wlk)

df_connected_cwd_skim = pd.concat(df_connected_cwd_skim)
df_all_trips = pd.concat(df_all_trips)

Processing - AM
Walk, transit, walk.
Processing - MD
Walk, transit, walk.
Processing - PM
Walk, transit, walk.
Processing - EV
Walk, transit, walk.
Processing - EA
Walk, transit, walk.


In [41]:
df_all_trips.head(3)

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,trip_type,trips,transbay_od,orig_rdm_zones,orig_super_dist,orig_county,dest_rdm_zones,dest_super_dist,dest_county,home_zone,income,Income,pp_share,link21_trip_purp,Period,Mode,orig,dest,crowd,rail_od,period
0,2158751,5014996.0,0,Work,atwork,865,1155,9,1,1.0,INM,1.0,0.0,San Mateo_32,7,2,Santa Clara_23,10,3,1,117090,100k+,0.0,work,am,Auto_SOV,865,1155,19.337141,1.0,am
1,2366600,5621180.0,0,escort,work,865,1155,6,1,1.0,INM,1.0,0.0,San Mateo_32,7,2,Santa Clara_23,10,3,648,112510,100k+,0.0,work,am,Auto_SOV,865,1155,19.337141,1.0,am
2,2503773,5953500.0,0,escort,work,865,1155,8,2,1.0,INM,1.0,0.0,San Mateo_32,7,2,Santa Clara_23,10,3,745,32775,30k_to_60k,0.0,work,am,Auto_2Person,865,1155,19.337141,1.0,am


In [42]:
# To get total transit trips of a concept scenario, use the baseline script! Do not use this cell.
# x = df_all_trips.trips.sum()
# print(f'Total number of trips: {x:,}')

Total number of trips: 5,188,967.0


In [43]:
df_all_trips['crowd_trips'] = df_all_trips['trips'] * df_all_trips['crowd'] / 100

In [44]:
df_temp1 = summarize_all_combinations(df_all_trips, groupby_columns=['Period'], 
                                      summary_column='crowd_trips')
df_temp1 = df_temp1.rename(columns={'Value':'crowd_trips'})


df_temp2 = summarize_all_combinations(df_all_trips, groupby_columns=['Period'], 
                                     summary_column='trips')
df_temp2 = df_temp2.rename(columns={'Value':'trips'})


region_value2 = pd.merge(df_temp1, df_temp2, on = ['Period'], how='left')
region_value2['Value'] = region_value2['crowd_trips'] / region_value2['trips']

region_value2 = region_value2[['Period', 'Value']]

region_value2['Concept_ID'] = concept_id
region_value2['Metric_ID'] = 'A1.10'
region_value2['Metric_name'] = 'Crowding (Region)'
region_value2['Submetric'] = 'A1.10.5'
region_value2['Description'] = 'Regional crowding'
region_value2['Population'] = 'Whole Population'
region_value2['Geography'] = 'Regional'
region_value2['Origin_zone'] = ''
region_value2['Dest_zone'] = ''
region_value2['Purpose'] = ''
region_value2['Mode'] = ''
region_value2['Income'] = ''
region_value2['Zone_ID'] = ''
region_value2['Units'] = 'minutes'
region_value2['Total_Increment'] = ''
region_value2 = region_value2[perf_measure_columns]

In [45]:
# all_dfs = [region_value, pp_region_value, df_rdm, df_rdm_pp, region_value2]
all_dfs = [region_value2]

for dfs in all_dfs:
    metric_name = '_regional_crowding_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    geography = '_' + dfs['Geography'][0].replace(' ', '_')
    dfs.to_csv(_join(summary_dir, file_name + metric_name + concept_id + geography + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0])
    
# combined_df = pd.concat([region_value, pp_region_value, df_rdm, df_rdm_pp]).reset_index(drop=True)
combined_df = region_value2.reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'A10.1' + '_regional_crowding_' + concept_id + '_region' +filename_extension+'_all_trips.csv'), index=None)

6 A1.10.5 Crowding (Region)
