#### Set the Constants

In [1]:
out_file_name = 'Trip Summaries.xlsx'
# out_dir = r'C:\Users\msaeidirazavi\OneDrive - Cambridge Systematics\MyFiles\210071 Link21\Trip Summary Scripts\Outputs'

quick = 0   # 0 or 1. Choose zero for full run,
            # else results will be just for a sample from the original input dataframe.

sample_size = 10001   # Choose a number greater than 10,000 or else the script may throw an error. 
c1, c2 = 'Measure_Description', 'Value'  # Column names in the output excel file.

#### Import Python Libraries

In [2]:
import os
import pandas as pd
import yaml
from collections import Counter

#### Set Pandas Options

In [3]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

#### Determine Input and Output Paths

In [4]:
with open('config.yaml', 'r') as file:
    params = yaml.safe_load(file)
    
_join = os.path.join

ctramp_dir = params['ctramp_dir']
in_trip_roster_path = _join(ctramp_dir, "_pre_process_files\\trip_roster.parquet")
in_link_vols = params['highway_output_data']   

summary_outputs = params['summary_dir']
out_path = _join(summary_outputs, out_file_name)

#### Create the Summary

In [5]:
df_out = pd.DataFrame({c1:[], c2:[]})
df_out

Unnamed: 0,Measure_Description,Value


In [6]:
df_original = pd.read_parquet(in_trip_roster_path)
df_links = pd.read_csv(in_link_vols)

In [7]:
df_sample = df_original.sample(n=sample_size)
df_sample.head()

Unnamed: 0,hh_id,person_id,inbound,orig_purpose,dest_purpose,orig_taz,dest_taz,depart_hour,trip_mode,sampleRate,trip_type,trips,transbay_od,orig_rdm_zones,orig_super_dist,orig_county,dest_rdm_zones,dest_super_dist,dest_county,home_zone,income,Income,pp_share,link21_trip_purp,Period,Mode
18671439,2838217,6728157.0,0,Home,escort,1016,1160,8,2,1.0,INM,1.0,0.0,Santa Clara_31,9,3,Santa Clara_23,10,3,1016,84681,60k_to_100k,0.0,escort,am,Auto_2Person
23961302,3244357,7790751.0,0,Home,shopping,1482,1498,9,2,1.0,INM,1.0,0.0,Santa Clara_48,14,3,Santa Clara_49,14,3,1482,64642,60k_to_100k,100.0,shopping,am,Auto_2Person
24553885,808296,1953555.0,0,Home,work,1559,999,6,1,1.0,INM,1.0,0.0,Alameda_53,15,4,Santa Clara_38,8,3,1559,82529,60k_to_100k,0.0,work,am,Auto_SOV
12244178,4164165,10200665.0,1,othdiscr,Home,3216,3153,11,4,1.0,INM,1.0,0.0,Sonoma_01,30,8,Sonoma_01,30,8,3153,106755,100k+,0.0,othdiscr,md,Walk
6903387,3532853,8498628.0,1,work,Home,1015,1521,17,6,1.0,INM,1.0,0.0,Santa Clara_31,9,3,Santa Clara_06,11,3,1521,6375,<30k,0.0,work,pm,WALK_TRANSIT


In [8]:
df = df_sample if quick else df_original 

In [9]:
total_trips = len(df)
print(f'Total number of trips: {total_trips:,}')
new = pd.DataFrame({c1:['Trips_Total'], c2:[total_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of trips: 35,996,800


Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0


In [10]:
modes = list(df['Mode'].unique()) 
modes = sorted(modes)
modes

['Auto_2Person',
 'Auto_3+Person',
 'Auto_SOV',
 'Bike',
 'KNR_TRANSIT',
 'PNR_TRANSIT',
 'RIDE_HAIL',
 'WALK_TRANSIT',
 'Walk']

In [11]:
modes = [
    'Walk',
    'Bike',
    'WALK_TRANSIT',
    'KNR_TRANSIT',
    'PNR_TRANSIT',    
    'Auto_3+Person',
    'Auto_2Person',
    'Auto_SOV',
    'RIDE_HAIL'
]

In [12]:
modes_auto = ['Auto_SOV', 'Auto_2Person', 'Auto_3+Person']
modes_transit = ['WALK_TRANSIT', 'PNR_TRANSIT', 'KNR_TRANSIT']

In [13]:
counts = Counter(df["Mode"])
dic_counts = dict(counts)
dic_counts

{'Auto_SOV': 13821942,
 'WALK_TRANSIT': 2114913,
 'Auto_2Person': 7497134,
 'Walk': 5071397,
 'Auto_3+Person': 4419521,
 'RIDE_HAIL': 285822,
 'Bike': 2104627,
 'PNR_TRANSIT': 393511,
 'KNR_TRANSIT': 287933}

In [14]:
transit_trips = 0
for m in modes_transit:
    transit_trips += counts[m]
print(f'Total number of transit trips: {transit_trips:,}')
new = pd.DataFrame({c1:['Trips_Transit'], c2:[transit_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of transit trips: 2,796,357


Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0
0,Trips_Transit,2796357.0


In [15]:
tb_trips = df.loc[(df['transbay_od']==1)].copy()
tb_trips = len(tb_trips)
print(f'Total number of transbay trips: {tb_trips:,}')

tb_transit_trips = df.loc[(df['transbay_od']==1)&(df['Mode'].str.contains('TRANSIT'))].copy()
tb_transit_trips = len(tb_transit_trips)
print(f'Transbay transit trips: {tb_transit_trips:,}')

tb_auto_trips = df.loc[(df['transbay_od']==1)&(df['Mode'].str.contains('Auto'))].copy()                                                                                              
tb_auto_trips = len(tb_auto_trips)
print(f'Transbay auto trips: {tb_auto_trips:,}')

new = pd.DataFrame({c1:['Trips_Transbay_Total', 'Trips_Transbay_Transit', 'Trips_Transbay_Auto'],
                    c2:[tb_trips, tb_transit_trips, tb_auto_trips]})
df_out = pd.concat([df_out, new])
df_out

Total number of transbay trips: 936,697
Transbay transit trips: 493,814
Transbay auto trips: 438,824


Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0
0,Trips_Transit,2796357.0
0,Trips_Transbay_Total,936697.0
1,Trips_Transbay_Transit,493814.0
2,Trips_Transbay_Auto,438824.0


In [16]:
df_shares = pd.DataFrame(list(dic_counts.items()), columns=['Mode', 'TripCount'])
df_shares.head(2)

Unnamed: 0,Mode,TripCount
0,Auto_SOV,13821942
1,WALK_TRANSIT,2114913


In [17]:
df_shares['ModeShare'] = df_shares['TripCount']/total_trips
df_shares.head(3)

Unnamed: 0,Mode,TripCount,ModeShare
0,Auto_SOV,13821942,0.383977
1,WALK_TRANSIT,2114913,0.058753
2,Auto_2Person,7497134,0.208272


In [18]:
for m in modes:
    for i, row in df_shares.iterrows():
        if row['Mode'] == m:
            desc = f'ModePercentShare_{m}'
            val = round(100 * row['ModeShare'], 5)
            new = pd.DataFrame({c1:[desc], c2:[val]})
            df_out = pd.concat([df_out, new])
            break
df_out

Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0
0,Trips_Transit,2796357.0
0,Trips_Transbay_Total,936697.0
1,Trips_Transbay_Transit,493814.0
2,Trips_Transbay_Auto,438824.0
0,ModePercentShare_Walk,14.08847
0,ModePercentShare_Bike,5.84671
0,ModePercentShare_WALK_TRANSIT,5.87528
0,ModePercentShare_KNR_TRANSIT,0.79988
0,ModePercentShare_PNR_TRANSIT,1.09318


In [19]:
tods = params['periods']

dic_tod = {}
for tod in tods:
    df_temp = df.loc[df['Period']==tod]
    val = len(df_temp)
    dic_tod[tod] = val
    desc = f'TOD_{tod}_Trips'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])

for tod in tods:
    val = round(dic_tod[tod]/total_trips*100, 5)
    desc = f'TOD_{tod}_PercentShare'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])
df_out

Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0
0,Trips_Transit,2796357.0
0,Trips_Transbay_Total,936697.0
1,Trips_Transbay_Transit,493814.0
2,Trips_Transbay_Auto,438824.0
0,ModePercentShare_Walk,14.08847
0,ModePercentShare_Bike,5.84671
0,ModePercentShare_WALK_TRANSIT,5.87528
0,ModePercentShare_KNR_TRANSIT,0.79988
0,ModePercentShare_PNR_TRANSIT,1.09318


In [20]:
total_vmt = 0
for tod in tods:
    df_links[f'{tod}_vmt'] = (df_links[f'autovol{tod.upper()}'] + df_links[f'truck volume{tod.upper()}'] +\
                              df_links[f'large truck linke volume{tod.upper()}']) * df_links['distance']
    val = df_links[f'{tod}_vmt'].sum()
    total_vmt += val
    desc = f'TOD_{tod}_VMT'
    new = pd.DataFrame({c1:[desc], c2:[val]})
    df_out = pd.concat([df_out, new])

new = pd.DataFrame({c1:['Total_VMT'], c2:[total_vmt]})
df_out = pd.concat([df_out, new])
df_out

# for per in periods:
#     df_temp.append({'Period': per,
#                     'Value': df[per+'_vmt'].sum()})
    
    
# df_links['am_vmt'] = (df_links['autovolAM'] + df_links['truck volumeAM'] + df_links['large truck linke volumeAM'])* df_links['distance']
# df_links['pm_vmt'] = (df_links['autovolPM'] + df_links['truck volumePM'] + df_links['large truck linke volumePM'])* df_links['distance']
# df_links['ea_vmt'] = (df_links['autovolEA'] + df_links['truck volumeEA'] + df_links['large truck linke volumeEA'])* df_links['distance']
# df_links['md_vmt'] = (df_links['autovolMD'] + df_links['truck volumeMD'] + df_links['large truck linke volumeMD'])* df_links['distance']
# df_links['ev_vmt'] = (df_links['autovolEV'] + df_links['truck volumeEV'] + df_links['large truck linke volumeEV'])* df_links['distance']

Unnamed: 0,Measure_Description,Value
0,Trips_Total,35996800.0
0,Trips_Transit,2796357.0
0,Trips_Transbay_Total,936697.0
1,Trips_Transbay_Transit,493814.0
2,Trips_Transbay_Auto,438824.0
0,ModePercentShare_Walk,14.08847
0,ModePercentShare_Bike,5.84671
0,ModePercentShare_WALK_TRANSIT,5.87528
0,ModePercentShare_KNR_TRANSIT,0.79988
0,ModePercentShare_PNR_TRANSIT,1.09318


#### Write Out the Summary File

In [22]:
df_out.to_excel(out_path, sheet_name='Trip Summaries', index=False)