#### Set the Constants

In [15]:
out_file_name = 'Trip Summaries2.xlsx'
# out_dir = r'C:\Users\msaeidirazavi\OneDrive - Cambridge Systematics\MyFiles\210071 Link21\Trip Summary Scripts\Outputs'

quick = 0   # 0 or 1. Choose zero for full run,
            # else results will be just for a sample from the original input dataframe.

sample_size = 10001   # Choose a number greater than 10,000 or else the script may throw an error. 
c1, c2 = 'Measure_Description', 'Value'  # Column names in the output excel file.

#### Import Python Libraries

In [16]:
import os
import pandas as pd
import yaml
from collections import Counter

#### Set Pandas Options

In [17]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Determine Input and Output Paths

In [18]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join

ctramp_dir = params['ctramp_dir']
in_trip_roster_path = _join(ctramp_dir, "_pre_process_files\\trip_roster.parquet")
in_link_vols = params['highway_output_data']   

summary_outputs = params['summary_dir']
out_path = _join(summary_outputs, out_file_name)

#### Create the Summary

In [19]:
df_out = pd.DataFrame({c1:[], c2:[]})
df_out

Unnamed: 0,Measure_Description,Value


In [20]:
in_link_vols

'E:\\TM2\\calibration_3332\\2015_calibration_FINALFINAL\\Shapefiles_with_assignment_results_2050_Baseline_R2_Run4\\traffic\\All links with model volume and speed_v07032023_CS.csv'

In [21]:
df_original = pd.read_parquet(in_trip_roster_path)
df_links = pd.read_csv(in_link_vols)

In [22]:
df_sample = df_original.sample(n=sample_size)
df_sample.head()

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,trip_type,trips,transbay_od,orig_rdm_zones,orig_super_dist,orig_county,dest_rdm_zones,dest_super_dist,dest_county,home_zone,income,Income,pp_share,link21_trip_purp,Period,Mode
7985938,1026046,2823925.0,1,shopping,social,3274,3274,15,4,0.3,INM,3.333333,0.0,Marin_01,33,9,Marin_01,33,9,3274,63750,60k_to_100k,0.0,social,pm,Walk
2797456,2761429,7504761.0,1,escort,Home,3166,3166,7,3,0.3,INM,3.333333,0.0,Sonoma_01,30,8,Sonoma_01,30,8,3166,26285,<30k,100.0,escort,am,Auto_3+Person
7263280,2464373,6706738.0,1,school,othdiscr,2910,2935,14,2,0.3,INM,3.333333,0.0,Solano_04,25,6,Solano_01,25,6,2910,62426,60k_to_100k,100.0,othdiscr,md,Auto_2Person
3717924,1605153,4234207.0,0,shopping,othmaint,645,645,11,4,0.3,INM,3.333333,0.0,San Mateo_09,5,2,San Mateo_09,5,2,696,116602,100k+,100.0,othmaint,md,Walk
3812985,1671018,4413924.0,0,Home,escort,757,794,14,2,0.3,INM,3.333333,0.0,San Mateo_19,6,2,San Mateo_20,6,2,757,184108,100k+,0.0,escort,md,Auto_2Person


In [23]:
df = df_sample if quick else df_original 

In [24]:
total_trips = df['trips'].sum()
print(f'Total number of trips: {total_trips:,}')
new = pd.DataFrame({c1:['Trips_Total'], c2:[total_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of trips: 28,204,913.33333367


Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0


In [25]:
modes = list(df['Mode'].unique()) 
modes = sorted(modes)
modes

['Auto_2Person',
 'Auto_3+Person',
 'Auto_SOV',
 'Bike',
 'KNR_TRANSIT',
 'PNR_TRANSIT',
 'RIDE_HAIL',
 'WALK_TRANSIT',
 'Walk']

In [26]:
modes = [
    'Walk',
    'Bike',
    'WALK_TRANSIT',
    'KNR_TRANSIT',
    'PNR_TRANSIT',    
    'Auto_3+Person',
    'Auto_2Person',
    'Auto_SOV',
    'RIDE_HAIL'
]

In [29]:
modes_auto = ['Auto_SOV', 'Auto_2Person', 'Auto_3+Person']
modes_transit = ['WALK_TRANSIT', 'PNR_TRANSIT', 'KNR_TRANSIT']

In [30]:
#counts = Counter(df["Mode"])
counts = df.groupby(["Mode"])['trips'].sum().reset_index()
dic_counts = dict(zip(counts['Mode'], counts['trips']))
dic_counts

{'Auto_2Person': 6562446.666666667,
 'Auto_3+Person': 4491496.666666667,
 'Auto_SOV': 11519660.0,
 'Bike': 528396.6666666667,
 'KNR_TRANSIT': 137030.0,
 'PNR_TRANSIT': 167213.33333333334,
 'RIDE_HAIL': 173300.0,
 'WALK_TRANSIT': 1112860.0,
 'Walk': 3512510.0}

In [31]:
transit_trips = 0
for m in modes_transit:
    transit_trips += dic_counts[m]
print(f'Total number of transit trips: {transit_trips:,}')
new = pd.DataFrame({c1:['Trips_Transit'], c2:[transit_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of transit trips: 1,417,103.3333333333


Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0
0,Trips_Transit,1417103.0


In [32]:
tb_trips = df.loc[(df['transbay_od']==1)].copy()
tb_trips = tb_trips['trips'].sum()
print(f'Total number of transbay trips: {tb_trips:,}')

tb_transit_trips = df.loc[(df['transbay_od']==1)&(df['Mode'].str.contains('TRANSIT'))].copy()
tb_transit_trips = tb_transit_trips['trips'].sum()
print(f'Transbay transit trips: {tb_transit_trips:,}')

tb_auto_trips = df.loc[(df['transbay_od']==1)&(df['Mode'].str.contains('Auto'))].copy()                                                                                              
tb_auto_trips = tb_auto_trips['trips'].sum()
print(f'Transbay auto trips: {tb_auto_trips:,}')

new = pd.DataFrame({c1:['Trips_Transbay_Total', 'Trips_Transbay_Transit', 'Trips_Transbay_Auto'],
                    c2:[tb_trips, tb_transit_trips, tb_auto_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of transbay trips: 686,626.6666666667
Transbay transit trips: 254,016.66666666674
Transbay auto trips: 431,416.66666666686


Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0
0,Trips_Transit,1417103.0
0,Trips_Transbay_Total,686626.7
1,Trips_Transbay_Transit,254016.7
2,Trips_Transbay_Auto,431416.7


In [33]:
df_shares = pd.DataFrame(list(dic_counts.items()), columns=['Mode', 'TripCount'])
df_shares

Unnamed: 0,Mode,TripCount
0,Auto_2Person,6562447.0
1,Auto_3+Person,4491497.0
2,Auto_SOV,11519660.0
3,Bike,528396.7
4,KNR_TRANSIT,137030.0
5,PNR_TRANSIT,167213.3
6,RIDE_HAIL,173300.0
7,WALK_TRANSIT,1112860.0
8,Walk,3512510.0


In [34]:
df_shares['ModeShare'] = df_shares['TripCount']/total_trips
df_shares

Unnamed: 0,Mode,TripCount,ModeShare
0,Auto_2Person,6562447.0,0.23267
1,Auto_3+Person,4491497.0,0.159245
2,Auto_SOV,11519660.0,0.408427
3,Bike,528396.7,0.018734
4,KNR_TRANSIT,137030.0,0.004858
5,PNR_TRANSIT,167213.3,0.005929
6,RIDE_HAIL,173300.0,0.006144
7,WALK_TRANSIT,1112860.0,0.039456
8,Walk,3512510.0,0.124535


In [35]:
for m in modes:
    for i, row in df_shares.iterrows():
        if row['Mode'] == m:
            desc = f'ModePercentShare_{m}'
            val = round(100 * row['ModeShare'], 5)
            new = pd.DataFrame({c1:[desc], c2:[val]})
            df_out = pd.concat([df_out, new])
            break
df_out

Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0
0,Trips_Transit,1417103.0
0,Trips_Transbay_Total,686626.7
1,Trips_Transbay_Transit,254016.7
2,Trips_Transbay_Auto,431416.7
0,ModePercentShare_Walk,12.45354
0,ModePercentShare_Bike,1.87342
0,ModePercentShare_WALK_TRANSIT,3.94562
0,ModePercentShare_KNR_TRANSIT,0.48584
0,ModePercentShare_PNR_TRANSIT,0.59285


In [36]:
tods = params['periods']

dic_tod = {}
for tod in tods:
    df_temp = df.loc[df['Period']==tod]
    val = df_temp['trips'].sum()
    dic_tod[tod] = val
    desc = f'TOD_{tod}_Trips'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])

for tod in tods:
    val = round(dic_tod[tod]/total_trips*100, 5)
    desc = f'TOD_{tod}_PercentShare'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])
df_out

Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0
0,Trips_Transit,1417103.0
0,Trips_Transbay_Total,686626.7
1,Trips_Transbay_Transit,254016.7
2,Trips_Transbay_Auto,431416.7
0,ModePercentShare_Walk,12.45354
0,ModePercentShare_Bike,1.87342
0,ModePercentShare_WALK_TRANSIT,3.94562
0,ModePercentShare_KNR_TRANSIT,0.48584
0,ModePercentShare_PNR_TRANSIT,0.59285


In [37]:
total_vmt = 0
for tod in tods:
    df_links[f'{tod}_vmt'] = (df_links[f'autovol{tod.upper()}'] + df_links[f'truck volume{tod.upper()}'] +\
                              df_links[f'large truck linke volume{tod.upper()}']) * df_links['distance']
    val = df_links[f'{tod}_vmt'].sum()
    total_vmt += val
    desc = f'TOD_{tod}_VMT'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])

new = pd.DataFrame({c1:['Total_VMT'], c2:[total_vmt]})
df_out = pd.concat([df_out, new])
df_out

# for per in periods:
#     df_temp.append({'Period': per,
#                     'Value': df[per+'_vmt'].sum()})
    
    
# df_links['am_vmt'] = (df_links['autovolAM'] + df_links['truck volumeAM'] + df_links['large truck linke volumeAM'])* df_links['distance']
# df_links['pm_vmt'] = (df_links['autovolPM'] + df_links['truck volumePM'] + df_links['large truck linke volumePM'])* df_links['distance']
# df_links['ea_vmt'] = (df_links['autovolEA'] + df_links['truck volumeEA'] + df_links['large truck linke volumeEA'])* df_links['distance']
# df_links['md_vmt'] = (df_links['autovolMD'] + df_links['truck volumeMD'] + df_links['large truck linke volumeMD'])* df_links['distance']
# df_links['ev_vmt'] = (df_links['autovolEV'] + df_links['truck volumeEV'] + df_links['large truck linke volumeEV'])* df_links['distance']

Unnamed: 0,Measure_Description,Value
0,Trips_Total,28204910.0
0,Trips_Transit,1417103.0
0,Trips_Transbay_Total,686626.7
1,Trips_Transbay_Transit,254016.7
2,Trips_Transbay_Auto,431416.7
0,ModePercentShare_Walk,12.45354
0,ModePercentShare_Bike,1.87342
0,ModePercentShare_WALK_TRANSIT,3.94562
0,ModePercentShare_KNR_TRANSIT,0.48584
0,ModePercentShare_PNR_TRANSIT,0.59285


#### Write Out the Summary File

In [38]:
df_out.to_excel(out_path, sheet_name='Trip Summaries', index=False)