In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
from pathlib import Path
from utility import *

import warnings
warnings.filterwarnings('ignore')

In [17]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

income_categories_bins = params['income_categories_bins']

preprocess_dir = _join(ctramp_dir, '_pre_process_files')
perf_measure_columns = params['final_columns']

summary_dir = params['summary_dir']
annual_transit_factor = params['annual_transit_factor']
annual_auto_factor = params['annual_auto_factor']

filename_extension = params['filename_extension']

In [3]:
demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
highway_skims_dir = _join(skims_dir, "highway")

In [4]:
df_trips = pd.read_parquet(_join(preprocess_dir, 'trip_roster.parquet'))

In [5]:
"""
toll_df = toll_df.rename(columns={'toll_revenue': 'Value'})
toll_df['Concept_ID'] = concept_id
toll_df['Metric_ID'] = 'F1.2'
toll_df['Metric_name'] = 'Toll revenue'
toll_df['Submetric'] = 'F1.2.1'
toll_df['Description'] = 'Toll revenue'
toll_df['Population'] = 'Whole Population'
toll_df['Geography'] = 'Region'
toll_df['Zone_ID'] = ''
toll_df['Origin_zone'] = ''
toll_df['Dest_zone'] = ''
toll_df['Units'] = '$'
toll_df['Total_Increment'] = ''

"""

"\ntoll_df = toll_df.rename(columns={'toll_revenue': 'Value'})\ntoll_df['Concept_ID'] = concept_id\ntoll_df['Metric_ID'] = 'F1.2'\ntoll_df['Metric_name'] = 'Toll revenue'\ntoll_df['Submetric'] = 'F1.2.1'\ntoll_df['Description'] = 'Toll revenue'\ntoll_df['Population'] = 'Whole Population'\ntoll_df['Geography'] = 'Region'\ntoll_df['Zone_ID'] = ''\ntoll_df['Origin_zone'] = ''\ntoll_df['Dest_zone'] = ''\ntoll_df['Units'] = '$'\ntoll_df['Total_Increment'] = ''\n\n"

In [6]:
## F1.1: Farebox revenues

In [7]:
df_trn = df_trips.loc[df_trips['trip_mode'].isin([6,7,8])]

In [8]:
create_rail_fare_od_pairs(preprocess_dir, transit_skims_dir, acc_egg_modes, time_periods)

Period:  am
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  md
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  pm
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ev
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ea
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR


In [10]:
#df_od_pr = omx.open_file(_join(preprocess_dir, "rail_fair_v9_trim_" + period.upper() + ".omx")

In [11]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_pr = omx.open_file(_join(preprocess_dir, "rail_fair_v9_trim_" + period.upper() + ".omx"))
    #print(df_od_pr['fare'].mean())
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK', cols=['orig', 'dest', 'fare'])
    print(df_rail_od['fare'].mean())
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR', cols=['orig', 'dest', 'fare'])
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK', cols=['orig', 'dest', 'fare'])
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR', cols=['orig', 'dest', 'fare'])
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK', cols=['orig', 'dest', 'fare'])
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)

processing - am
393.7311
processing - md
371.50684
processing - pm
382.42435
processing - ev
354.28903
processing - ea
371.71173


In [12]:
df_trn_rail['transit_revenue'] = df_trn_rail['trips'] * df_trn_rail['fare'] * 1.81 / 100 # fare is in cents converting to 2022 dollars

In [32]:
# summarise for prioirty population

#regional value
region_value = df_trn_rail.groupby(['Period'])['transit_revenue'].sum().reset_index()
region_value = region_value.rename(columns={'transit_revenue': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'F1.1'
region_value['Metric_name'] = 'Farebox revenue'
region_value['Submetric'] = 'F1.1.1'
region_value['Description'] = 'Average weekday transit farebox revenue in the region'
region_value['Population'] = 'Whole Population'
region_value['Geography'] = 'Regional'
region_value['Origin_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Purpose'] = ''
region_value['Zone_ID'] = ''
region_value['Units'] = '2022 dollars'
region_value['Total_Increment'] = ''
region_value = region_value[perf_measure_columns]

# transbay region
tb_value = df_trn_rail[df_trn_rail['transbay_od']==1]
tb_value = tb_value.groupby(['Period'])['transit_revenue'].sum().reset_index()
tb_value = tb_value.rename(columns={'transit_revenue' : 'Value'})

tb_value['Concept_ID'] = concept_id
tb_value['Metric_ID'] = 'F1.1'
tb_value['Metric_name'] = 'Farebox revenue'
tb_value['Submetric'] = 'F1.1.2'
tb_value['Description'] = 'Average weekday transit farebox revenue between transbay OD pairs'
tb_value['Population'] = 'Whole Population'
tb_value['Geography'] = 'Transbay'
tb_value['Zone_ID'] = ''
tb_value['Origin_zone'] = ''
tb_value['Dest_zone'] = ''
tb_value['Purpose'] = ''
tb_value['Units'] = '2022 dollars'
tb_value['Total_Increment'] = ''
tb_value = tb_value[perf_measure_columns]

#county 
county_df_temp = df_trn_rail.groupby(['orig_county', 'dest_county', 'Period'])['transit_revenue'].sum().reset_index()
county_df_temp = county_df_temp.rename(columns={'transit_revenue': 'Value',
                                               'orig_county': 'Origin_zone',
                                               'dest_county': 'Dest_zone'})
county_df_temp['Concept_ID'] = concept_id
county_df_temp['Metric_ID'] = 'F1.1'
county_df_temp['Metric_name'] = 'Farebox revenue'
county_df_temp['Submetric'] = 'F1.1.3'
county_df_temp['Description'] = 'Average weekday transit farebox revenue between origin and destination county'
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Geography'] = 'County'
county_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
county_df_temp['Purpose'] = ''
county_df_temp['Units'] = '2022 dollars'
county_df_temp['Total_Increment'] = ''
county_df_temp = county_df_temp[perf_measure_columns]

#RDM Zones
rdm_df_temp = df_trn_rail.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])['transit_revenue'].sum().reset_index()
rdm_df_temp = rdm_df_temp.rename(columns={'transit_revenue': 'Value',
                                        'orig_rdm_zones': 'Origin_zone',
                                        'dest_rdm_zones': 'Dest_zone'})
rdm_df_temp['Concept_ID'] = concept_id
rdm_df_temp['Metric_ID'] = 'F1.1'
rdm_df_temp['Metric_name'] = 'Farebox revenue'
rdm_df_temp['Submetric'] = 'F1.1.4'
rdm_df_temp['Description'] = 'Average weekday transit farebox revenue between origin and destination RDM zones'
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
rdm_df_temp['Purpose'] = ''
rdm_df_temp['Units'] = '2022 dollars'
rdm_df_temp['Total_Increment'] = ''
rdm_df_temp = rdm_df_temp[perf_measure_columns]


#super district
sd_df_temp = df_trn_rail.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['transit_revenue'].sum().reset_index()
sd_df_temp = sd_df_temp.rename(columns={'transit_revenue': 'Value',
                                        'orig_super_dist': 'Origin_zone',
                                        'dest_super_dist': 'Dest_zone'})
sd_df_temp['Concept_ID'] = concept_id
sd_df_temp['Metric_ID'] = 'F1.1'
sd_df_temp['Metric_name'] = 'Farebox revenue'
sd_df_temp['Submetric'] = 'F1.1.5'
sd_df_temp['Description'] = 'Farebox revenue'
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Geography'] = 'Average weekday transit farebox revenue between origin and destination super district'
sd_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
sd_df_temp['Purpose'] = ''
sd_df_temp['Units'] = '2022 dollars'
sd_df_temp['Total_Increment'] = ''
sd_df_temp = sd_df_temp[perf_measure_columns]

In [33]:
all_dfs = [region_value, tb_value, county_df_temp, rdm_df_temp, sd_df_temp]

for dfs in all_dfs:
    metric_name = 'weekday_farebox_revenue_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    file_name = dfs['Submetric'][0]
    dfs.to_csv(_join(summary_dir, metric_name + file_name + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0], dfs['Value'].sum())
    
combined_df = pd.concat([region_value, tb_value, county_df_temp, rdm_df_temp, sd_df_temp]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'weekday_farebox_revenue_' + 'F1.1' + filename_extension + '.csv'), index=None)

5 F1.1.1 Farebox revenue 8125265.336965763
5 F1.1.2 Farebox revenue 2034610.7146440847
368 F1.1.3 Farebox revenue 8125265.336965762
127670 F1.1.4 Farebox revenue 8125265.336965762
4033 F1.1.5 Farebox revenue 8125265.336965762


In [27]:
annual_transit_factor

321.8

In [34]:
all_dfs = [region_value, tb_value, county_df_temp, rdm_df_temp, sd_df_temp]
i=6

for dfs in all_dfs:
    metric_name = 'annual_farebox_revenue_'
    dfs = dfs.reset_index(drop=True)
    dfs = dfs[perf_measure_columns]
    dfs['Value'] = dfs['Value'] * annual_transit_factor
    dfs['Description'] = dfs['Description'].str.replace('Average weekday', 'Annual')
    file_name = 'F1.1.' + str(i) #dfs['Submetric'][0]
    dfs.to_csv(_join(summary_dir, metric_name + file_name + filename_extension + '.csv'), index=None)
    print(len(dfs), file_name, dfs['Metric_name'][0],  dfs['Value'].sum())
    i=i+1
    
combined_df = pd.concat([region_value, tb_value, county_df_temp, rdm_df_temp, sd_df_temp]).reset_index(drop=True)
combined_df.to_csv(_join(summary_dir, 'annual_farebox_revenue_' + 'F1.1' + filename_extension + '.csv'), index=None)

5 F1.1.6 Farebox revenue 2614710385.4355826
5 F1.1.7 Farebox revenue 654737727.9724665
368 F1.1.8 Farebox revenue 2614710385.4355826
127670 F1.1.9 Farebox revenue 2614710385.435582
4033 F1.1.10 Farebox revenue 2614710385.435582
