In [1]:
import os
import pandas as pd
import numpy as np
import openmatrix as omx
import random
import yaml

from utility import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join
_dir = os.path.dirname
_norm = os.path.normpath

# paths
model_outputs_dir = params['model_dir']
skims_dir = _join(model_outputs_dir, "skims")
summary_outputs = params['summary_dir']
concept_id = params['concept_id']
ctramp_dir = params['ctramp_dir']
iteration = params['iteration']

concept_id = params['concept_id']
time_period_mapping = params['time_periods_mapping']
link21_purp_mapping = params['purpose_mapping']
mode_cat_mapping = params['mode_mapping']
time_periods = params['periods']
acc_egg_modes = params['access_egress_modes']

In [3]:
# outputs of CT-RAMP model for tour and trip file
household_model_dir = _join(model_outputs_dir, "main")

# input household and person data
person_file = _join(ctramp_dir, 'main\\personData_' + str(iteration) + '.csv')
household_file = _join(ctramp_dir, 'main\\householdData_' + str(iteration) + '.csv')

person = pd.read_csv(person_file)

hh = pd.read_csv(household_file, usecols = ['hh_id', 'taz'])
hh = hh.rename(columns = {'taz': 'home_zone'})

#taz to RDM zones, super districts, county
geo_cwks = pd.read_csv(_join(params['common_dir'], "geographies.csv")) #columns taz, rdm_zones, super_district, county

#taz to priority population
pp_perc = pd.read_excel(_join(params['common_dir'], "TAZ_Tract_cwk_summary.xlsx")) #columns = taz, pp_share 

# transbay od pairs
transbay_od = pd.read_csv(_join(params['common_dir'], "transbay_od.csv")) #columns = transbay_o, transbay_d

demand_matrices_dir = _join(model_outputs_dir, "demand_matrices")
transit_demand_dir = _join(demand_matrices_dir, "transit")
transit_skims_dir = _join(skims_dir, "transit")
best_path_skim_dir = params['best_path_skim_dir']

perf_measure_columns = params['final_columns']

In [4]:
transit_data = params['transit_output_data']

In [5]:
import os
import pandas as pd
#from tidyr import separate
#from dplyr import select, filter

modeloutput_folder = transit_data #"output_data_transit/"
outputtables_folder = "OutputTables/"

input_Excel = "LINK21_TransitValidation_2015 DRAFT template.xlsx"
output_Excel = "LINK21_TransitValidation_2015 DRAFT v04102023_CS_Run1b_INC.xlsx"

segmentfiles = [f for f in os.listdir(modeloutput_folder) if f.startswith('Segment')]
linefiles = [f for f in os.listdir(modeloutput_folder) if f.startswith('Transit summary')]
linemodefiles = [f for f in os.listdir(modeloutput_folder) if f.startswith('Transit lines_')]

In [6]:
segmentfiles

['Segment results_am.csv',
 'Segment results_ea.csv',
 'Segment results_ev.csv',
 'Segment results_md.csv',
 'Segment results_pm.csv']

In [7]:
linefiles

['Transit summary by line_am.csv',
 'Transit summary by line_ea.csv',
 'Transit summary by line_ev.csv',
 'Transit summary by line_md.csv',
 'Transit summary by line_pm.csv']

In [8]:
linemodefiles

['Transit lines_onlymodeandsrcmode_am.csv',
 'Transit lines_onlymodeandsrcmode_ea.csv',
 'Transit lines_onlymodeandsrcmode_ev.csv',
 'Transit lines_onlymodeandsrcmode_md.csv',
 'Transit lines_onlymodeandsrcmode_pm.csv']

In [9]:
segment_final = pd.DataFrame()
print("*****Reading Segment Results Files*****")

ctr = 1
for file in segmentfiles:
    print(f"....Reading {file} ({ctr} of {len(segmentfiles)})")
    with open(_join(modeloutput_folder, file)) as f:
        txt = f.readlines()[1:] # skip first line
    txt = [x.split(',')[:10] for x in txt] # keep first 10 elements and split by ","
    txt = [x for x in txt if all(len(e)>0 for e in x)] # keep only non-empty vectors
    segment1 = pd.DataFrame(txt, columns=["Line", "Segment", "Length", "Time", "Speed", "Loadfactor", "Volume", "Stop", "Board", "Alight"])
    segment1 = segment1[["Line","Segment", "Volume", "Stop", "Board", "Alight"]]
    segment1[["Mode", "Agency", "GTFS_Routeid", "Timeperiod", "Direction", "Segid"]] = segment1["Line"].str.split(pat="_", n=5, expand=True)
    segment1 = segment1[segment1["Mode"] != "pnr"]
    segment_final = pd.concat([segment_final, segment1], ignore_index=True)
    ctr += 1

*****Reading Segment Results Files*****
....Reading Segment results_am.csv (1 of 5)
....Reading Segment results_ea.csv (2 of 5)
....Reading Segment results_ev.csv (3 of 5)
....Reading Segment results_md.csv (4 of 5)
....Reading Segment results_pm.csv (5 of 5)


In [10]:
linesum_final = pd.DataFrame()
print("*****Reading Transit Summary by Line Files*****")

ctr = 1
for file in linefiles:
    print(f"....Reading {file} ({ctr} of {len(linefiles)})")
    
    # data is malformed. Hence, using read_text
    #txt = pd.read_csv(_join(modeloutput_folder, file), header=None, skiprows=1)
    #break
    with open(_join(modeloutput_folder, file)) as f:
        txt = f.readlines()[1:] # skip first line

    txt = [x.split(',') for x in txt]
    txt = list(filter(lambda x: len(x) > 0, txt))
    txt = [x[:10] for x in txt] 
    linesum1 = pd.DataFrame(txt, columns=["Line", "del" ,"Modechar", "veh", "nveh1", "nveh2", 
                                          "Headway", "Length", "Time", "Passengers"])
    
  
    linesum1 = linesum1[["Line", "Modechar", "Passengers"]]
    linesum1[["Modenum", "Agency", "GTFS_Routeid", "Timeperiod", "Direction", "Segid"]] = linesum1["Line"].str.split("_", expand=True, n=5)
    
    linesum1 = linesum1[linesum1["Modenum"] != "pnr"]
    linesum1 = linesum1.astype({"Modenum": int})
    
    linesum_final = pd.concat([linesum_final, linesum1], ignore_index=True)
    ctr += 1

*****Reading Transit Summary by Line Files*****
....Reading Transit summary by line_am.csv (1 of 5)
....Reading Transit summary by line_ea.csv (2 of 5)
....Reading Transit summary by line_ev.csv (3 of 5)
....Reading Transit summary by line_md.csv (4 of 5)
....Reading Transit summary by line_pm.csv (5 of 5)


In [11]:
linemodesum_final = pd.DataFrame()
print("*****Reading Transit Line Files with Original Mode*****")
ctr = 1
for file in linemodefiles:
    print(f"....Reading {file} ({ctr} of {len(linemodefiles)})")
    linemodesum1 = pd.read_csv(_join(modeloutput_folder, file))
    linemodesum1 = linemodesum1.iloc[:, [0, 2]]
    linemodesum1.columns = ["Line", "Orig_Mode"]
    linemodesum_final = linemodesum_final.append(linemodesum1)
    ctr += 1

*****Reading Transit Line Files with Original Mode*****
....Reading Transit lines_onlymodeandsrcmode_am.csv (1 of 5)
....Reading Transit lines_onlymodeandsrcmode_ea.csv (2 of 5)
....Reading Transit lines_onlymodeandsrcmode_ev.csv (3 of 5)
....Reading Transit lines_onlymodeandsrcmode_md.csv (4 of 5)
....Reading Transit lines_onlymodeandsrcmode_pm.csv (5 of 5)


In [12]:
linesummary = pd.merge(linesum_final, linemodesum_final, on = 'Line')

In [13]:
linesummary.shape

(3959, 10)

In [14]:
linesummary

Unnamed: 0,Line,Modechar,Passengers,Modenum,Agency,GTFS_Routeid,Timeperiod,Direction,Segid,Orig_Mode
0,10_573_198_AM_d0_s493,o,53.1345858573913574,10,573,198,AM,d0,s493,b
1,10_573_199_AM_d0_s492,o,216.9216642379760742,10,573,199,AM,d0,s492,b
2,10_573_200_AM_d0_s498,o,190.7014350891113281,10,573,200,AM,d0,s498,b
3,10_573_201_AM_d0_s490,o,29.6223683059215546,10,573,201,AM,d0,s490,b
4,10_573_202_AM_d0_s494,o,144.5206700563430786,10,573,202,AM,d0,s494,b
...,...,...,...,...,...,...,...,...,...,...
3954,7__289_PM_d0_s718,o,0.0000000000000000,7,,289,PM,d0,s718,b
3955,7__290_PM_d0_s726,o,131.2536240816116333,7,,290,PM,d0,s726,b
3956,99_533_259_PM_d0_s677,o,89.5609741210937500,99,533,259,PM,d0,s677,b
3957,99_533_261_PM_d0_s678,o,28.0000000000000000,99,533,261,PM,d0,s678,b


In [58]:
segment_final['Volume'] = segment_final['Volume'].astype(float)

In [28]:
segment_final['Board'] = segment_final['Board'].astype(float)
segment_final['Alight'] = segment_final['Alight'].astype(float)

#get stations
segment_final[['A_Station', 'B_Station']] = segment_final['Segment'].str.split("-", expand=True)

In [52]:
#Summarise total boardings and lightings by station ids, agency 
trn_stn_br = segment_final.groupby(['A_Station', 'Timeperiod', 'Agency'])['Board'].sum().reset_index()
trn_stn_br = trn_stn_br.loc[trn_stn_br['Board']>0]

trn_stn_al = segment_final.groupby(['A_Station', 'Timeperiod', 'Agency'])['Alight'].sum().reset_index()
trn_stn_al = trn_stn_al.loc[trn_stn_al['Alight']>0]

In [53]:
trn_stn_br = trn_stn_br.rename(columns={'A_Station' : 'Stations', 
                                       'Board' : 'Value',
                                       'Timeperiod': 'Period'})

trn_stn_br['Concept_ID'] = concept_id
trn_stn_br['Metric_ID'] = 'A3.2'
trn_stn_br['Metric_name'] = 'Transit Ridership'
trn_stn_br['Submetric'] = 'A3.2.1'
trn_stn_br['Description'] =  'Boardings at stations'
trn_stn_br['Population'] = 'Whole Population'
trn_stn_br['Geography'] = 'Regional'
trn_stn_br['Origin_zone'] = ''
trn_stn_br['Dest_zone'] = ''
trn_stn_br['Purpose'] = ''
trn_stn_br['Units'] = 'Boardings'
trn_stn_br['Total_Increment'] = ''

In [54]:
trn_stn_br

Unnamed: 0,Stations,Period,Agency,Value,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Geography,Origin_zone,Dest_zone,Purpose,Units,Total_Increment
21,100014,AM,VTA,12.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
22,100014,EA,VTA,4.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
24,100014,MD,VTA,8.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
25,100014,PM,VTA,12.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
43,100070,EV,VTA,8.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404956,99968,AM,VTA,59.3235,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
404959,99968,EV,VTA,4.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
404961,99968,MD,VTA,8.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,
404963,99968,PM,VTA,16.0000,BaseYear2015,A3.2,Transit Ridership,A3.2.1,Boardings at stations,Whole Population,Regional,,,,Boardings,


In [55]:
trn_stn_al = trn_stn_al.rename(columns={'A_Station' : 'Stations', 
                                       'Alight' : 'Value',
                                       'Timeperiod': 'Period'})
trn_stn_al['Concept_ID'] = concept_id
trn_stn_al['Metric_ID'] = 'A3.2'
trn_stn_al['Metric_name'] = 'Transit Ridership'
trn_stn_al['Submetric'] = 'A3.2.2'
trn_stn_al['Description'] =  'Alightings at stations'
trn_stn_al['Population'] = 'Whole Population'
trn_stn_al['Geography'] = 'Regional'
trn_stn_al['Origin_zone'] = ''
trn_stn_al['Dest_zone'] = ''
trn_stn_al['Purpose'] = ''
trn_stn_al['Units'] = 'Alightings'
trn_stn_al['Total_Increment'] = ''

In [64]:
trn_seg = segment_final.groupby(['A_Station', 'B_Station', 'Timeperiod', 'Agency'])['Volume'].sum().reset_index()

In [65]:
trn_seg = trn_seg.loc[trn_seg['Volume']>0]

In [66]:
trn_seg = trn_seg.rename(columns={'A_Station' : 'Origin_Station', 
                                     'B_Station' : 'Dest_Station',
                                     'Volume' : 'Value',
                                     'Timeperiod': 'Period'})
trn_seg['Concept_ID'] = concept_id
trn_seg['Metric_ID'] = 'A3.2'
trn_seg['Metric_name'] = 'Transit Ridership'
trn_seg['Submetric'] = 'A3.2.3'
trn_seg['Description'] =  'Flows between origin and destination stations'
trn_seg['Population'] = 'Whole Population'
trn_seg['Geography'] = 'Regional'
trn_seg['Purpose'] = ''
trn_seg['Units'] = ''
trn_seg['Total_Increment'] = ''

In [67]:
trn_seg

Unnamed: 0,Origin_Station,Dest_Station,Period,Agency,Value,Concept_ID,Metric_ID,Metric_name,Submetric,Description,Population,Geography,Purpose,Units,Total_Increment
0,10000,10866,AM,SFMTA,759.555664,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
1,10000,10866,EA,SFMTA,16.000000,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
2,10000,10866,EV,SFMTA,140.866745,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
3,10000,10866,MD,SFMTA,344.408661,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
4,10000,10866,PM,SFMTA,404.740479,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557263,99997,135494,AM,VTA,352.158447,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
557264,99997,135494,EA,VTA,46.666668,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
557265,99997,135494,EV,VTA,33.416935,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
557266,99997,135494,MD,VTA,167.151520,BaseYear2015,A3.2,Transit Ridership,A3.2.3,Flows between origin and destination stations,Whole Population,Regional,,,
