In [None]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml
import geopandas as gpd

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [None]:
!pip install --force-reinstall -v "openpyxl==3.1.0"

In [None]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join

# paths
ctramp_dir = params['ctramp_dir']
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
model_dir = params['model_dir']

concept_id = params['concept_id']
perf_measure_columns = params['final_columns']
transit_data = params['transit_output_data']
preprocess_dir = _join(ctramp_dir, '_pre_process_files')

output_summary = _join(model_dir, 'output_summaries')
transit_dashboard = _join(preprocess_dir, 'transit_dashboard')

filename_extension = params['filename_extension']

if not os.path.exists(transit_dashboard):
    os.makedirs(transit_dashboard)

In [None]:
time_period = {'AM' : 1, 'MD' : 2, 'PM': 3, 'EV': 4, 'EA': 5}

agency_mapping = {'BART' : 1, 'Caltrain': 2, 'Capitol Corridor': 3, 'Valley Link': 4, 'ACE': 5}

#rail_agency = [36,38,39,43,44]

In [None]:
line_cwk = pd.read_excel(_join(transit_data, 'Link21 emme_tlines.2050_R39_R2_Run4.xlsx'), sheet_name="Dashboard")

In [None]:
line_cwk['Agency'].value_counts()

In [None]:
line_cwk = line_cwk[['ID', 'MODE', '#time_peri', 'Agency']]
line_cwk.columns = ['line', 'mode', 'time_period', 'agency']

In [None]:
line_cwk['agency'] = line_cwk['agency'].map(agency_mapping)
line_cwk['scenario'] = concept_id

In [None]:
rail_agency = list(line_cwk['agency'].unique())
rail_agency

In [None]:
segment_volume = _join(transit_data)
print(segment_volume)


segment_transfers = _join(output_summary)
print(segment_transfers)


segmentfiles = [f for f in os.listdir(segment_volume) if f.startswith('Segment')]
segmentfiles_transfers = [f for f in os.listdir(segment_transfers) if f.startswith('transit_segment')]

all_segments = []
for file in segmentfiles:
    print(file)
    segment = pd.read_csv(_join(segment_volume, file))
    segment.columns = segment.columns.str.lower()
    segment = segment.rename(columns={'estperiodcapacity': 'periodcapacity'})
    segment = segment[~segment['line'].astype(str).str.startswith(('pnr'))]
    segment = segment[['line', 'segment', 'volume', 'board', 'alight', 'totalcapacity', 'periodcapacity', 'stop']]
    segment[['i_station', 'j_station']] = segment['segment'].str.split("-", expand=True)
    all_segments.append(segment)

all_trnfrs = []
for file in segmentfiles_transfers:
    print(file)
    trnfrs = segment = pd.read_csv(_join(segment_transfers, file))
    trnfrs.columns = segment.columns.str.lower()
    trnfrs = trnfrs[~trnfrs['line'].astype(str).str.startswith(('pnr'))]
    trnfrs = trnfrs.rename(columns={'line':'seg_id'})

    trnfrs = trnfrs.groupby(['seg_id'])['direct_transfer_board_ptw', 
                     'direct_transfer_board_wtp',
                     'direct_transfer_board_ktw', 
                     'direct_transfer_board_wtk',
                     'direct_transfer_board_wtw', 
                     'auxiliary_transfer_board_ptw', 
                     'auxiliary_transfer_board_wtp', 
                     'auxiliary_transfer_board_ktw', 
                     'auxiliary_transfer_board_wtk'].sum()

    trnfrs['transfers'] = trnfrs.sum(axis=1)
    trnfrs = trnfrs[['transfers']].reset_index()

    all_trnfrs.append(trnfrs)

all_trnfrs = pd.concat(all_trnfrs, ignore_index=True)
all_trnfrs['scenario'] = concept_id

all_trnfrs = all_trnfrs[['seg_id', 'scenario' ,'transfers']]

all_segments = pd.concat(all_segments, ignore_index=True)  
all_segments = pd.merge(all_segments, line_cwk, on=['line'], how='left')
all_segments =  all_segments[all_segments['agency'].isin(rail_agency)]
all_segments['time_period'] = all_segments['time_period'].map(time_period)
all_segments['crowding'] = all_segments['volume'] /all_segments['periodcapacity']

all_segments['volume'] = all_segments['volume'].round()
all_segments['board'] = all_segments['board'].round()
all_segments['alight'] = all_segments['alight'].round()

all_segments['seg_id'] = [str(x)+'-'+str(y)+'-'+str(z) for x,y,z in zip(all_segments['line'], 
                                                                        all_segments['i_station'], 
                                                                        all_segments['j_station'])]


print(all_segments.columns)
all_segments = all_segments.drop(columns= ['segment'])
print(all_trnfrs.columns)
all_segments = pd.merge(all_segments, all_trnfrs, on=['seg_id', 'scenario'], how='left')

#all_segments = all_segments.dropna()
all_segments['direction'] = np.where(all_segments['line'].str.contains("d0"), 0, 1)

dtype = {#'i_station' : 'int64',
         #'j_station' : 'int64',
         'volume' : 'int32',
         'board' : 'int32',
         'alight' : 'int32',
         'totalcapacity': 'float32',
         'periodcapacity': 'float32',
         'headway': 'float32',
         'time_period': 'int16',
         'agency': 'int16',
         'agency_mode': 'int16',
         'scenario' : 'int16',
         'crowding' : 'float32',
         'stop':'int64',
         'direction': 'int16'
}
#all_segments = all_segments.astype(dtype)
#segment_final = pd.concat([segment_final, all_segments], ignore_index=False)
#transfer_final = pd.concat([transfer_final, all_trnfrs], ignore_index=False)

In [None]:
all_segments

In [None]:
%%time

# add shapefiles 

print(f'reading transit segments file')
seg_shp = gpd.read_file(_join(transit_data, 'emme_tsegs.shp'))
seg_shp = seg_shp[['SEG_ID', 'SEG_NUM', 'geometry']]
seg_shp.columns = seg_shp.columns.str.lower()
#seg_shp = seg_shp.to_crs("4326")
seg_shp.columns = ['seg_id', 'seg_num', 'geometry']

#merge with segment geometry
seg_final = pd.merge(seg_shp, all_segments, on='seg_id', how='right')
seg_final['scenario'] = concept_id

map_ids = list(seg_final['stop'].unique())

print(f'reading transit nodes file')
node_shp = gpd.read_file(_join(transit_data, 'emme_nodes.shp'))
node_shp = node_shp[['ID', 'geometry']]
node_shp.columns = node_shp.columns.str.lower()
#node_shp = node_shp.to_crs("4326")
node_shp['scenario'] = concept_id
node_shp = node_shp[node_shp['id'].isin(map_ids)]

print("Writing segment file")
seg_final['seg_ij'] = seg_final['i_station'] + '_' + seg_final['j_station']
seg_geo = seg_final[['seg_ij', 'agency', 'time_period', 'scenario', 'geometry']]
attrs = ['seg_ij', 'agency', 'time_period', 'scenario']
seg_geo_shp = seg_geo.dissolve(by=attrs, as_index=False)
seg_geo_shp = seg_geo_shp[['seg_ij', 'scenario', 'geometry']]
seg_geo_shp = seg_geo_shp.groupby(['seg_ij', 'scenario']).first().reset_index()
seg_geo_shp.to_file(_join(transit_dashboard, 'tdb_segments_' + concept_id + '.shp'))

print("Writing node file")
node_shp.to_file(_join(transit_dashboard, 'tdb_node_' + concept_id +  '.shp'))

In [None]:
all_segments

In [None]:
seg_geo_tdb = all_segments.copy()
seg_geo_tdb['id'] = seg_geo_tdb['i_station'] + '_' + seg_geo_tdb['j_station']

seg_tdb = seg_geo_tdb[['i_station', 'j_station', 'agency', 'time_period', 'direction', 'stop',
                       'volume', 'periodcapacity', 'scenario', 'board', 'alight', 'transfers']]
seg_tdb1 = seg_tdb.drop_duplicates()
print(len(seg_tdb), len(seg_tdb1))

seg_tdb1 = seg_tdb1.rename(columns={'i_station': 'i_stop', 'j_station': 'j_stop'})
seg_tdb1['seg_ij'] = seg_tdb1['i_stop'] + '_' + seg_tdb1['j_stop']

#seg_tdb1.to_parquet(_join(transit_dashboard, "transit_ridership_results_" + concept_id + ".parquet"))

In [None]:
seg_tdb = seg_tdb1.groupby(['i_stop', 'j_stop', 'seg_ij', 
                 'agency', 'time_period','scenario'])['volume', 'periodcapacity'].sum().reset_index()

seg_tdb['i_stop'] = seg_tdb['i_stop'].astype('int64')
seg_tdb['j_stop'] = seg_tdb['j_stop'].astype('int64')

In [None]:
#stops dashboard
stops = seg_tdb1.groupby(['stop', 'agency', 'time_period', 
            'scenario'])['alight', 'board', 'transfers'].sum().reset_index()

stops['stop'] = stops['stop'].round()
stops['i_stop'] = stops['i_stop'].astype('int64')

In [None]:
seg_tdb_results = seg_tdb.merge(stops, left_on=['i_stop', 'agency', 'time_period', 'scenario'], 
                right_on=['stop', 'agency', 'time_period', 'scenario'], how='left')

seg_tdb_results['perc_transfers'] = np.where(seg_tdb_results['board']>0, 
                                             seg_tdb_results['transfers']/ seg_tdb_results['board'], 0)

seg_tdb_results['direction']=1
seg_tdb_results['stop'] = seg_tdb_results['stop'].fillna(999999)
seg_tdb_results['stop'] = seg_tdb_results['stop'].astype("int64")

seg_tdb_results['crowding'] = seg_tdb_results['volume']/seg_tdb_results['periodcapacity']

In [None]:
seg_tdb_results.to_parquet(_join(transit_dashboard, "transit_ridership_results_" + concept_id + ".parquet"))
stops.to_parquet(_join(transit_dashboard, "transit_ridership_comparison_" + concept_id + ".parquet"))

In [None]:
time_period_map = {1: 'AM', 2: 'MD', 3:'PM', 4:'EV', 5:'EA'}

In [None]:
#Summarise total boardings and lightings by station ids, agency 
stops['time_period'] = stops['time_period'].map(time_period_map)
seg_tdb['time_period'] = seg_tdb['time_period'].map(time_period_map)

trn_stn_br = stops.groupby(['stop', 'time_period'])['board'].sum().reset_index()
trn_stn_br = trn_stn_br.loc[trn_stn_br['board']>0]
trn_stn_br = trn_stn_br.rename(columns = {'Zone_ID': 'stop'})

trn_stn_al = stops.groupby(['stop', 'time_period'])['alight'].sum().reset_index()
trn_stn_al = trn_stn_al.loc[trn_stn_al['alight']>0]

In [None]:
trn_stn_br = trn_stn_br.rename(columns={'stop' : 'Zone_ID', 
                                       'board' : 'Value',
                                       'time_period': 'Period'})

trn_stn_br['Concept_ID'] = concept_id
trn_stn_br['Metric_ID'] = 'A3.2'
trn_stn_br['Metric_name'] = 'Transit Ridership'
trn_stn_br['Submetric'] = 'A3.2.1'
trn_stn_br['Description'] =  'Boardings at only rail stations. Zone ID represents the station nodes'
trn_stn_br['Population'] = 'Whole Population'
trn_stn_br['Geography'] = 'Regional'
trn_stn_br['Origin_zone'] = ''
trn_stn_br['Dest_zone'] = ''
trn_stn_br['Purpose'] = ''
trn_stn_br['Mode'] = ''
trn_stn_br['Income'] = ''
trn_stn_br['Units'] = 'Boardings'
trn_stn_br['Total_Increment'] = ''

trn_stn_br = trn_stn_br[perf_measure_columns]

In [None]:
trn_stn_al = trn_stn_al.rename(columns={'stop' : 'Zone_ID', 
                                       'alight' : 'Value',
                                       'time_period': 'Period'})
trn_stn_al['Concept_ID'] = concept_id
trn_stn_al['Metric_ID'] = 'A3.2'
trn_stn_al['Metric_name'] = 'Transit Ridership'
trn_stn_al['Submetric'] = 'A3.2.2'
trn_stn_al['Description'] =  'Alightings at only rail stations. Zone ID represents the station nodes'
trn_stn_al['Population'] = 'Whole Population'
trn_stn_al['Geography'] = 'Regional'
trn_stn_al['Origin_zone'] = ''
trn_stn_al['Dest_zone'] = ''
trn_stn_al['Purpose'] = ''
trn_stn_al['Mode'] = ''
trn_stn_al['Income'] = ''
trn_stn_al['Units'] = 'Alightings'
trn_stn_al['Total_Increment'] = ''

trn_stn_al = trn_stn_al[perf_measure_columns]

In [None]:
trn_seg = seg_tdb.groupby(['i_stop', 'j_stop', 'time_period'])['volume'].sum().reset_index()

In [None]:
trn_seg = trn_seg.loc[trn_seg['volume']>0]

In [None]:
trn_seg = trn_seg.rename(columns={'i_stop' : 'Origin_zone', 
                                     'j_stop' : 'Dest_zone',
                                     'volume' : 'Value',
                                     'time_period': 'Period'})
trn_seg['Concept_ID'] = concept_id
trn_seg['Metric_ID'] = 'A3.2'
trn_seg['Metric_name'] = 'Transit Ridership'
trn_seg['Submetric'] = 'A3.2.3'
trn_seg['Description'] =  'Flows between origin and destination stations'
trn_seg['Population'] = 'Whole Population'
trn_seg['Geography'] = 'Regional'
trn_seg['Purpose'] = ''
trn_seg['Units'] = ''
trn_seg['Mode'] = ''
trn_seg['Income'] = ''
trn_seg['Zone_ID'] = ''
trn_seg['Total_Increment'] = ''
trn_seg = trn_seg[perf_measure_columns]

In [None]:
trn_seg.columns

In [None]:
trn_stn_br.to_csv(_join(summary_outputs, 'A3.2.1' + '_weekday_transit_boardings_' + 
                        concept_id + '_region' + filename_extension + '.csv'), index=False)

In [None]:
trn_stn_al.to_csv(_join(summary_outputs, 'A3.2.2' + '_weekday_transit_alightings_' + 
                        concept_id + '_region' + filename_extension + '.csv'), index=False)

In [None]:
trn_seg.to_csv(_join(summary_outputs, 'A3.2.3' + '_weekday_transit_segment_volumes_' + 
                     concept_id + '_region' + filename_extension + '.csv'), index=False)

In [None]:
trn_stn_br_annual = trn_stn_br.copy()
trn_stn_br_annual['Value'] = trn_stn_br_annual['Value'] * params['annual_transit_factor']
trn_stn_br_annual['Submetric'] = 'A3.2.4'

trn_stn_al_annual = trn_stn_al.copy()
trn_stn_al_annual['Value'] = trn_stn_al_annual['Value'] * params['annual_transit_factor']
trn_stn_al_annual['Submetric'] = 'A3.2.5'

trn_seg_annual = trn_stn_br.copy()
trn_seg_annual['Value'] = trn_seg_annual['Value'] * params['annual_transit_factor']
trn_seg_annual['Submetric'] = 'A3.2.6'

In [None]:
trn_stn_br_annual.to_csv(_join(summary_outputs, 'A3.2.4' + '_annual_transit_boardings_' + 
                        concept_id + '_region' + filename_extension + '.csv'), index=False)

In [None]:
trn_stn_al_annual.to_csv(_join(summary_outputs, 'A3.2.5' + '_annual_transit_alightings_' + 
                        concept_id + '_region' + filename_extension + '.csv'), index=False)

In [None]:
trn_seg_annual.to_csv(_join(summary_outputs, 'A3.2.6' + '_annual_transit_segment_volumes_' + 
                     concept_id + '_region' + filename_extension + '.csv'), index=False)