In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

income_categories_bins = params['income_categories_bins']


In [3]:
time_periods

['am', 'md', 'pm', 'ev', 'ea']

In [4]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
highway_skims_dir = _join(skims_dir, "highway")

In [5]:
#trip roster
df_trips = create_trip_roster(ctramp_dir, hh, pp_perc ,transbay_od, geo_cwks, link21_purp_mapping, iteration)

NAs in PP Share: 0


In [6]:
df_trips['Period'] = df_trips['depart_hour'].map(time_period_mapping)
df_trips['Mode'] = df_trips['trip_mode'].map(mode_cat_mapping)

In [8]:
auto_trips = df_trips.loc[df_trips['trip_mode'].isin([1,2,3])]

In [9]:
time_periods

['am', 'md', 'pm', 'ev', 'ea']

In [14]:
hh_inc = pd.read_csv(household_file, usecols = ['hh_id', 'income'])

In [15]:
auto_trips = pd.merge(auto_trips, hh_inc , on = 'hh_id', how = 'left')

In [22]:
income_categories_bins

[0, 30000, 60000, 100000, 10000000000]

In [23]:
auto_trips['inc_grp'] = pd.cut(auto_trips['income'], bins = income_categories_bins, labels=['L', 'M', 'H', 'XH'])

In [24]:
auto_trips['inc_grp'].value_counts()

XH    1198963
H      747913
M      658032
L      443160
Name: inc_grp, dtype: int64

In [32]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_skim = omx.open_file(_join(highway_skims_dir, "HWYSKM" + period.upper() + ".omx"))
    auto_trips_pd = auto_trips[auto_trips['Period'] == period]
    
    sov_trips = auto_trips_pd[auto_trips_pd['Mode'] == 'Auto_SOV']
    hov_trips = auto_trips_pd[auto_trips_pd['Mode'] == 'Auto_2Person']
    hov2_trips = auto_trips_pd[auto_trips_pd['Mode'] == 'Auto_3+Person']
    
    
    all_trips = [(sov_trips, 'DA'), (hov_trips, 'S2'), (hov2_trips, 'S3')]
    
    for occ_trips in all_trips:
        
        for inc in ['L', 'M', 'H', 'XH']:
            
            print(inc, len(occ_trips[0]))
            trips_inc = occ_trips[0][occ_trips[0]['inc_grp'] == inc]
            
            toll_od1 = skim_core_to_df(df_skim, 'BTOLL'+occ_trips[1]+inc, cols=['orig', 'dest', 'btoll'])
            toll_od2 = skim_core_to_df(df_skim, 'BTOLL'+occ_trips[1]+inc, cols=['orig', 'dest', 'vtoll'])
            
            trips_inc = pd.merge(trips_inc, toll_od1, 
                                  left_on =['orig_taz', 'dest_taz'], 
                                  right_on=['orig', 'dest'], 
                                  how ='inner')
            
            trips_inc = pd.merge(trips_inc, toll_od2, 
                                  left_on =['orig_taz', 'dest_taz'], 
                                  right_on=['orig', 'dest'], 
                                  how ='inner')

            trips_inc['toll'] = trips_inc['btoll'] + trips_inc['vtoll']
    
        df_temp.append(trips_inc)

df_auto = pd.concat(df_temp)

processing - am
L 268950
M 268950
H 268950
XH 268950
L 132487
M 132487
H 132487
XH 132487
L 97673
M 97673
H 97673
XH 97673
processing - md
L 452385
M 452385
H 452385
XH 452385
L 249442
M 249442
H 249442
XH 249442
L 128525
M 128525
H 128525
XH 128525
processing - pm
L 493186
M 493186
H 493186
XH 493186
L 271160
M 271160
H 271160
XH 271160
L 172081
M 172081
H 172081
XH 172081
processing - ev
L 253717
M 253717
H 253717
XH 253717
L 135251
M 135251
H 135251
XH 135251
L 70628
M 70628
H 70628
XH 70628
processing - ea
L 40929
M 40929
H 40929
XH 40929
L 11821
M 11821
H 11821
XH 11821
L 5711
M 5711
H 5711
XH 5711


In [37]:
hwy_skim = omx.open_file(_join(highway_skims_dir, 'HWYSKMMD.omx'))
hwy_skim.list_matrices()

['BTOLLDAH',
 'BTOLLDAL',
 'BTOLLDAM',
 'BTOLLDAXH',
 'BTOLLS2H',
 'BTOLLS2L',
 'BTOLLS2M',
 'BTOLLS2XH',
 'BTOLLS3H',
 'BTOLLS3L',
 'BTOLLS3M',
 'BTOLLS3XH',
 'BTOLL_LRGLRGTRK',
 'BTOLL_MEDTRK',
 'BTOLL_SMLTRK',
 'BTOLL_VSMTRK',
 'COSTDAH',
 'COSTDAL',
 'COSTDAM',
 'COSTDAXH',
 'COSTLRGTRK',
 'COSTS2H',
 'COSTS2L',
 'COSTS2M',
 'COSTS2XH',
 'COSTS3H',
 'COSTS3L',
 'COSTS3M',
 'COSTS3XH',
 'COSTTRK',
 'DISTDAH',
 'DISTDAL',
 'DISTDAM',
 'DISTDAXH',
 'DISTLRGTRK',
 'DISTS2H',
 'DISTS2L',
 'DISTS2M',
 'DISTS2XH',
 'DISTS3H',
 'DISTS3L',
 'DISTS3M',
 'DISTS3XH',
 'DISTTRK',
 'FREEFLOWTIMEDAH',
 'FREEFLOWTIMEDAL',
 'FREEFLOWTIMEDAM',
 'FREEFLOWTIMEDAXH',
 'FREEFLOWTIMELRGTRK',
 'FREEFLOWTIMES2H',
 'FREEFLOWTIMES2L',
 'FREEFLOWTIMES2M',
 'FREEFLOWTIMES2XH',
 'FREEFLOWTIMES3H',
 'FREEFLOWTIMES3L',
 'FREEFLOWTIMES3M',
 'FREEFLOWTIMES3XH',
 'FREEFLOWTIMETRK',
 'GCTIMEDAH',
 'GCTIMEDAL',
 'GCTIMEDAM',
 'GCTIMEDAXH',
 'GCTIMELRGTRK',
 'GCTIMES2H',
 'GCTIMES2L',
 'GCTIMES2M',
 'GCTIMES2XH',
 'GCT

In [41]:
np.array(hwy_skim['BTOLLDAH'])>0

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [33]:
df_auto['toll_revenue'] = df_auto['trips']*df_auto['toll']

In [42]:
df_auto.shape

(1084112, 49)

In [34]:
toll_df = df_auto.groupby(['Mode'])['toll_revenue'].sum().reset_index()

In [35]:
toll_df = toll_df.rename(columns={'toll_revenue': 'Value'})
toll_df['Concept_ID'] = concept_id
toll_df['Metric_ID'] = 'F1.2'
toll_df['Metric_name'] = 'Toll revenue'
toll_df['Submetric'] = 'F1.2.1'
toll_df['Description'] = 'Toll revenue'
toll_df['Population'] = 'Whole Population'
toll_df['Geography'] = 'Region'
toll_df['Zone_ID'] = 'Megaregion'
toll_df['Origin_zone'] = ''
toll_df['Dest_zone'] = ''
toll_df['Units'] = '$'
toll_df['Total_Increment'] = ''

In [36]:
toll_df

Unnamed: 0,Mode,Value,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Geography,Zone_ID,Origin_zone,Dest_zone,Units,Total_Increment
0,Auto_2Person,16241110.0,BaseYear2015,F1.2,Toll revenue,F1.2.1,Toll revenue,Whole Population,Region,Megaregion,,,$,
1,Auto_3+Person,7406678.0,BaseYear2015,F1.2,Toll revenue,F1.2.1,Toll revenue,Whole Population,Region,Megaregion,,,$,
2,Auto_SOV,33957350.0,BaseYear2015,F1.2,Toll revenue,F1.2.1,Toll revenue,Whole Population,Region,Megaregion,,,$,


In [None]:
## F1.1: Farebox revenues

In [43]:
hwy_skim = omx.open_file(_join(transit_skims_dir, 'trnskmam_KNR_TRN_WLK.omx'))
hwy_skim.list_matrices()

['BOARDS',
 'CROWD',
 'DDIST',
 'DTIME',
 'FARE',
 'IVT',
 'IVTCOM',
 'IVTEXP',
 'IVTFRY',
 'IVTHVY',
 'IVTLOC',
 'IVTLRT',
 'IWAIT',
 'WACC',
 'WAIT',
 'WAUX',
 'WEGR',
 'XWAIT']

In [50]:
period

'ea'

In [10]:
def create_rail_od_pairs(transit_demand_dir, transit_skims_dir, acc_egg_modes, time_periods):
    
    #Creates the Rail OD eligible Files
    for per in time_periods:
        print("Period: ",per)

        rail_demand = omx.open_file(_join(transit_demand_dir, "rail_fair_v9_trim_" + per.upper() + ".omx"),'w') 
        for acc_egg in acc_egg_modes:
            print("Access Egress Mode: ",acc_egg)
            trn_skm = omx.open_file(_join(transit_skims_dir, "trnskm" + per.lower() +"_" + acc_egg.upper() + ".omx"))
            fares = np.array(trn_skm['FARE'])
            rail_demand[acc_egg] = fares

        rail_demand.close()

In [11]:
create_rail_od_pairs(transit_demand_dir, transit_skims_dir, acc_egg_modes, time_periods)

Period:  am
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  md
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  pm
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ev
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR
Period:  ea
Access Egress Mode:  WLK_TRN_WLK
Access Egress Mode:  KNR_TRN_WLK
Access Egress Mode:  PNR_TRN_WLK
Access Egress Mode:  WLK_TRN_PNR
Access Egress Mode:  WLK_TRN_KNR


In [None]:
df_temp = []

for period in time_periods:
    print(f'processing - {period}')
    
    df_od_skm = omx.open_file(_join(transit_demand_dir, "rail_fair_v9_trim_" + period.upper() + ".omx"))
    df_trn_pd = df_trn[df_trn['Period'] == period]
    
    #walk transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'WALK_TRANSIT']
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_WLK', cols=['orig', 'dest', 'fare'])
    df_trn_wlk = pd.merge(df_trn_acc, df_rail_od, 
                          left_on =['orig_taz', 'dest_taz'], 
                          right_on=['orig', 'dest'], 
                          how ='inner')
    
    # PNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'PNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_PNR', cols=['orig', 'dest', 'fare'])
    df_trn_pnr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'PNR_TRN_WLK', cols=['orig', 'dest', 'fare'])
    df_trn_pnr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_pnr = pd.concat([df_trn_pnr_inb, df_trn_pnr_outbnd], ignore_index=True)
    
    # KNR Transit
    df_trn_acc = df_trn_pd[df_trn_pd['Mode'] == 'KNR_TRANSIT']
    df_trn_acc_inbnd = df_trn_acc[df_trn_acc['inbound'] == 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'WLK_TRN_KNR', cols=['orig', 'dest', 'fare'])
    df_trn_knr_inb = pd.merge(df_trn_acc_inbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')


    df_trn_acc_outbnd = df_trn_acc[df_trn_acc['inbound'] != 1] # returning home
    df_rail_od = skim_core_to_df(df_od_pr, 'KNR_TRN_WLK', cols=['orig', 'dest', 'fare'])
    df_trn_knr_outbnd = pd.merge(df_trn_acc_outbnd, df_rail_od, 
                              left_on =['orig_taz', 'dest_taz'], 
                              right_on=['orig', 'dest'], how ='inner')

    df_trn_knr = pd.concat([df_trn_knr_inb, df_trn_knr_outbnd], ignore_index=True)
    
    df_trn_rail = pd.concat([df_trn_wlk, df_trn_pnr, df_trn_knr], ignore_index=True)
    df_temp.append(df_trn_rail)

df_trn_rail = pd.concat(df_temp)

In [None]:
df_trn_rail['fare'] = df_trn_rail['fare'].fillna(0)

In [None]:
df_trn_rail['transit_revenue'] = df_trn_rail['trips'] * df_trn_rail['fare']

In [None]:
# summarise for prioirty population



#regional value
region_value = df_temp.groupby(['Period'])['transit_revenue'].mean().reset_index()
region_value = region_value.rename(columns={'transit_revenue': 'Value'})
region_value['Concept_ID'] = concept_id
region_value['Metric_ID'] = 'F1.1'
region_value['Metric_name'] = 'Farebox revenue'
region_value['Submetric'] = 'F1.1.1'
region_value['Description'] = 'Farebox revenue'
region_value['Population'] = 'Whole Population'
region_value['Geography'] = 'Regional'
region_value['Orig_zone'] = ''
region_value['Dest_zone'] = ''
region_value['Zone_ID'] = 'Megaregion'
region_value['Units'] = 'minutes'
region_value = region_value[perf_measure_columns]

#county 
county_df_temp = df_trn_rail.groupby(['orig_county', 'dest_county', 'Period'])['transit_revenue'].mean().reset_index()
county_df_temp = county_df_temp.rename(columns={'transit_revenue': 'Value',
                                               'orig_county': 'Origin_zone',
                                               'dest_county': 'Dest_zone'})
county_df_temp['Concept_ID'] = concept_id
county_df_temp['Metric_ID'] = 'F1.1'
county_df_temp['Metric_name'] = 'Farebox revenue'
county_df_temp['Submetric'] = 'F1.1.2'
county_df_temp['Description'] = 'Farebox revenue'
county_df_temp['Population'] = 'Whole Population'
county_df_temp['Geography'] = 'County'
county_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
county_df_temp['Units'] = 'minutes'
county_df_temp['Total_Increment'] = ''


#super district
sd_df_temp = df_trn_rail.groupby(['orig_super_dist', 'dest_super_dist', 'Period'])['transit_revenue'].mean().reset_index()
sd_df_temp = sd_df_temp.rename(columns={'transit_revenue': 'Value',
                                        'orig_super_dist': 'Origin_zone',
                                        'dest_super_dist': 'Dest_zone'})
sd_df_temp['Concept_ID'] = concept_id
sd_df_temp['Metric_ID'] = 'F1.1'
sd_df_temp['Metric_name'] = 'Farebox revenue'
sd_df_temp['Submetric'] = 'F1.1.3'
sd_df_temp['Description'] = 'Farebox revenue'
sd_df_temp['Population'] = 'Whole Population'
sd_df_temp['Geography'] = 'Super district'
sd_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
sd_df_temp['Units'] = 'minutes'
sd_df_temp['Total_Increment'] = ''


#RDM Zones
rdm_df_temp = df_trn_rail.groupby(['orig_rdm_zones', 'dest_rdm_zones', 'Period'])['transit_revenue'].mean().reset_index()
rdm_df_temp = rdm_df_temp.rename(columns={'transit_revenue': 'Value',
                                        'orig_rdm_zones': 'Origin_zone',
                                        'dest_rdm_zones': 'Dest_zone'})
rdm_df_temp['Concept_ID'] = concept_id
rdm_df_temp['Metric_ID'] = 'F1.1'
rdm_df_temp['Metric_name'] = 'Farebox revenue'
rdm_df_temp['Submetric'] = 'F1.1.4'
rdm_df_temp['Description'] = 'Farebox revenue'
rdm_df_temp['Population'] = 'Whole Population'
rdm_df_temp['Geography'] = 'RDM'
rdm_df_temp['Zone_ID'] = ''
#df_cnty['Origin_zone'] = ''
#df_cnty['Dest_zone'] = ''
rdm_df_temp['Units'] = 'minutes'
rdm_df_temp['Total_Increment'] = ''




# transbay region
tb_value = df_trn_rail[df_trn_rail['transbay_od']==1]
tb_value = tb_value.groupby(['Period'])['trips'].mean().reset_index()

tb_value['Concept_ID'] = concept_id
tb_value['Metric_ID'] = 'F1.1'
tb_value['Metric_name'] = 'Farebox revenue'
tb_value['Submetric'] = 'F1.1.5'
tb_value['Description'] = 'Farebox revenue'
tb_value['Population'] = 'Whole Population'
tb_value['Geography'] = 'Transbay'
tb_value['Zone_ID'] = ''
tb_value['Origin_zone'] = ''
tb_value['Dest_zone'] = ''
tb_value['Units'] = 'minutes'
tb_value['Total_Increment'] = ''

In [None]:
df_region_period = df_trn_rail.groupby(['Period'])['transit_revenue'].sum().reset_index()

df_region_period = toll_df.rename(columns={'transit_revenue': 'Value'})
df_region_period['Concept_ID'] = concept_id
df_region_period['Metric_ID'] = 'F1.1'
df_region_period['Metric_name'] = 'Farebox revenue'
df_region_period['Submetric'] = 'F1.1.1'
df_region_period['Description'] = 'Farebox revenue'
df_region_period['Population'] = 'Whole Population'
df_region_period['Geography'] = 'Region'
df_region_period['Zone_ID'] = 'Megaregion'
df_region_period['Origin_zone'] = ''
df_region_period['Dest_zone'] = ''
df_region_period['Units'] = '$'
df_region_period['Total_Increment'] = ''

In [None]:
df_region_period = df_trn_rail.groupby(['Period'])['transit_revenue'].sum().reset_index()

df_region_period = toll_df.rename(columns={'transit_revenue': 'Value'})
df_region_period['Concept_ID'] = concept_id
df_region_period['Metric_ID'] = 'F1.1'
df_region_period['Metric_name'] = 'Farebox revenue'
df_region_period['Submetric'] = 'F1.1.1'
df_region_period['Description'] = 'Farebox revenue'
df_region_period['Population'] = 'Whole Population'
df_region_period['Geography'] = 'Region'
df_region_period['Zone_ID'] = 'Megaregion'
df_region_period['Origin_zone'] = ''
df_region_period['Dest_zone'] = ''
df_region_period['Units'] = '$'
df_region_period['Total_Increment'] = ''