In [None]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']

perf_measure_columns = params['final_columns']
summary_dir = params['summary_dir']
filename_extension = params['filename_extension']

In [None]:
def post_processing_station_paris(stns_df):
    """ add time of day, clean station names, 
        
        selecting the row (of the set of duplicates) with the lowest number of 
        transfers by time period and station pair. 
        
        Station names – in GTFS, each station is represented by at least 2 platforms (e.g., northbound and southbound). 
        So we should group platforms together that belong to the same station. Please note that these GTFS files 
        include the HSR stations, including ones in the Los Angeles area.
        
        Time period – for GTFS processing, I’ve developed 6 time periods, with overnight being 
        encompassing 1900-2600 (late night services that run past midnight are coded as 24:00:00 in GTFS) and 
        0000-0300. These two periods will need to be combined into a single ‘overnight’ time period.
        
        Duplicate rows – After doing the above two tests, you’ll have a number of duplicate rows for 
        each station pair and time period. This is because there are paths from many time points: 
        sometimes a 1-transfer movement is faster than a no-transfer movement, sometimes it’s vice-versa. 
        Because this is a connectivity metric, I would suggest selecting the row (of the set of duplicates) with the lowest number of transfers by time period and station pair. 
        High-Speed Rail – The GTFS files include HSR, so you might want to/need to filter out those station pairs. I am not sure if the BC team wants to include those; I believe the metric is for “regional rail/BART” services.
        
        Summarization – after the above three steps, you’ll have to develop the metric. 
        I think they want the number of station pairs by 0/1/2/3+ transfers. 

    """
    
    
    tod_dict = {'0300-0600' : 'EA', 
                '0600-1000' : 'AM',
                '1000-1500' : 'MD',
                '1500-1900' : 'PM',
                '1900-2600' : 'EV'}
    
    
    columns = ['stop_name.x', 'stop_name.y']
    strngs_replace = [' - Northbound', ' - Southbound', ' - Westbound', ' - Eastbound']

    for col in columns:
        for strng in strngs_replace:
            stns[col] = stns[col].str.replace(strng,'')
    
    stns['Period'] = stns['time_period'].map(tod_dict)
    
    stn_pairs = stns.groupby(['stop_name.x', 'stop_name.y', 'Period'])['transfers'].min().reset_index()
    
    
    #TO DO Ask BC team about removing HSR stations
    stn_pairs.loc[stn_pairs['transfers']>=2, 'transfers'] = '2+'
    stn_pairs['Value'] = 1
    
    trnsfers = stn_pairs.groupby(['Period', 'transfers'])['Value'].sum().reset_index()
    
    return trnsfers

In [None]:
def create_transfers_performance_measure(stn_pairs, concept_id):
    """populates all the relevant columns for final performance measure
    """
    
    stn_pairs = stn_pairs.rename(columns={'transfers' : 'Zone_ID'})
    stn_pairs['Concept_ID'] = concept_id
    stn_pairs['Metric_ID'] = 'A1.12'
    stn_pairs['Metric_name'] = 'Network integration (stations)'
    stn_pairs['Submetric'] = ''
    stn_pairs['Description'] =  'Connectivity at different number of transfers. The Zone ID refers to number of transfers'
    stn_pairs['Population'] = 'Whole Population'
    stn_pairs['Geography'] = 'Regional'
    stn_pairs['Origin_zone'] = ''
    stn_pairs['Dest_zone'] = ''
    stn_pairs['Purpose'] = ''
    stn_pairs['Mode'] = ''
    stn_pairs['Income'] = ''
    stn_pairs['Units'] = 'Number of station pairs'
    stn_pairs['Total_Increment'] = ''
    
    return stn_pairs

In [None]:
#TO DO: change the path  and file name here
stns = pd.read_csv(_join(ctramp_dir, r"R2R40.csv"))  


stn_pairs = post_processing_station_paris(stns)


final_df = summarize_all_combinations(stn_pairs, 
                                      groupby_columns=['Period', 'transfers'], 
                                      summary_column='Value')

final_df = create_transfers_performance_measure(final_df, concept_id)
final_df = final_df[perf_measure_columns]
#final_df

In [None]:
#combined_df.to_csv(_join(summary_dir, 'A3.3' + '_mode_shares_' + concept_id + '_region' + filename_extension+'.csv'), index=None)

In [None]:
final_df.to_csv(_join(summary_dir, 'A1.12' + '_network_integration_' + concept_id + '_region' + filename_extension+'.csv'), index=None)