### README: This notebook uses Google Map Direction APIs to collect the level of service for vista trips with following travel modes:
* transit

Note that we can also collect for other modes, but due to quote limitation, we only collect for `transit`. For other modes, we will use HERE APIs

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import json, os, time
import numpy as np
import pandas as pd
import googlemaps
from datetime import datetime, timedelta
from los_goolge_api_utils import *

In [3]:
# Setup google api key
GCP_MAP_KEY = os.environ.get('GCP_ELEVATION_API')
gmap = googlemaps.Client(key=GCP_MAP_KEY)

### Load trips data

In [4]:
trips = pd.read_csv("../data/processed/trip_od_full.csv", low_memory=False)
trips.shape

(36287, 111)

In [5]:
# Use relavent columns
cols = ['TRIPID', 'Mode', 'LINKMODE', 'TRIPPURP', 'DESTPURP1', 'TRIPTIME', 'TRAVTIME',
        'TRAVDOW', 'TRAVDATE', 'TRAVMONTH', 'TRAVYEAR', 'STARTHOUR', 'STARTIME', 
        'HomeSA1', 'CUMDIST','ORIGSA1', 'DESTSA1', 'ORIG_X', 'ORIG_Y', 'DEST_X', 'DEST_Y']
trips = trips[cols]
trips.shape

(36287, 21)

### Get trips departure time

In [6]:
# Get detailed start time of trips
trips['STARTMIN'] = trips['STARTIME'] % 60
trips['START_DETAIL_TIME'] = (trips['TRAVDATE'].astype(str) + " " + trips['STARTHOUR'].astype(str) 
                              + ":" + trips['STARTMIN'].astype(str))

In [7]:
trips.head().T

Unnamed: 0,0,1,2,3,4
TRIPID,Y12H0000107P01T01,Y12H0000107P01T02,Y12H0000107P01T03,Y12H0000107P01T04,Y12H0000107P02T01
Mode,Car,Car,Walk,Walk,Car
LINKMODE,Vehicle Driver,Vehicle Driver,Walking,Walking,Vehicle Passenger
TRIPPURP,Social,Social,Recreational,Recreational,Social
DESTPURP1,Social,At or Go Home,Recreational,At or Go Home,Social
TRIPTIME,20,10,30,30,20
TRAVTIME,20,10,30,30,20
TRAVDOW,Sunday,Sunday,Sunday,Sunday,Sunday
TRAVDATE,27/05/2012,27/05/2012,27/05/2012,27/05/2012,27/05/2012
TRAVMONTH,May,May,May,May,May


### Collect Level of Service from Google APIs

In [8]:
def get_datetime(str_time):
    return datetime.strptime(str_time,'%d/%m/%Y %H:%M')

# Test
print(get_datetime("27/05/2012 23:30"))

2012-05-27 23:30:00


In [9]:
def get_transit_future_departure_time(departure_time):
    """
    This function will get the transit future time for the same date of week of trip departure time,
    but in 2 weeks in futures.
    """
    tripday = get_datetime(departure_time)
    today = datetime.now()
    monday = today - timedelta(days=today.weekday())
    # Get same week day of the trip date
    sameday_thisweek = monday + timedelta(tripday.weekday())
    sameday_thisweek = sameday_thisweek.replace(hour=tripday.hour, minute=tripday.minute, second=tripday.second)
    # Get same day but in 2 weeks in future
    sameday_next2week = sameday_thisweek + timedelta(days=2*7) # Next 2 weeks 
    return sameday_next2week

In [10]:
# Test
dep_time = "06/08/2015 23:30"
get_transit_future_departure_time(dep_time)

datetime.datetime(2021, 9, 2, 23, 30, 0, 361133)

In [11]:
# Test with trips dataframe
trips.head(20).START_DETAIL_TIME.apply(lambda x: get_transit_future_departure_time(x))

0    2021-09-05 10:30:00.113294
1    2021-09-05 12:30:00.113317
2    2021-09-05 13:30:00.113332
3    2021-09-05 14:15:00.113344
4    2021-09-05 10:30:00.113356
5    2021-09-05 12:30:00.113367
6    2021-09-05 13:30:00.113379
7    2021-09-05 14:15:00.113390
8    2021-09-05 10:30:00.113401
9    2021-09-05 12:30:00.113412
10   2021-09-05 13:30:00.113423
11   2021-09-05 14:15:00.113434
12   2021-09-02 05:45:00.113446
13   2021-09-02 14:40:00.113457
14   2021-09-02 15:31:00.113468
15   2021-09-02 16:00:00.113480
16   2021-09-02 16:35:00.113491
17   2021-08-31 09:45:00.113502
18   2021-08-31 13:00:00.113513
19   2021-08-31 15:00:00.113524
Name: START_DETAIL_TIME, dtype: datetime64[ns]

In [12]:
def get_trip_los(trip, mode='transit'):
    time.sleep(0.3)
    start_time = get_transit_future_departure_time(trip['START_DETAIL_TIME'])    
    orig = [trip.ORIG_Y, trip.ORIG_X]
    dest = [trip.DEST_Y, trip.DEST_X]
    (trip['TRANSIT_TIME'], trip['TRANSIT_LEN']) = collect_google_los(gmap, orig, dest, mode, start_time)
    return trip

In [13]:
%%time
# Test
trips.head(20).apply(lambda x: get_trip_los(x), axis=1)

CPU times: user 278 ms, sys: 8.27 ms, total: 287 ms
Wall time: 10.7 s


Unnamed: 0,TRIPID,Mode,LINKMODE,TRIPPURP,DESTPURP1,TRIPTIME,TRAVTIME,TRAVDOW,TRAVDATE,TRAVMONTH,...,ORIGSA1,DESTSA1,ORIG_X,ORIG_Y,DEST_X,DEST_Y,STARTMIN,START_DETAIL_TIME,TRANSIT_TIME,TRANSIT_LEN
0,Y12H0000107P01T01,Car,Vehicle Driver,Social,Social,20,20,Sunday,27/05/2012,May,...,20904122028,20904122002,145.087302,-37.659741,145.071523,-37.651478,30,27/05/2012 10:30,1541,1930
1,Y12H0000107P01T02,Car,Vehicle Driver,Social,At or Go Home,10,10,Sunday,27/05/2012,May,...,20904122002,20904122028,145.071523,-37.651478,145.087302,-37.659741,30,27/05/2012 12:30,1231,2698
2,Y12H0000107P01T03,Walk,Walking,Recreational,Recreational,30,30,Sunday,27/05/2012,May,...,20904122028,20904122031,145.087302,-37.659741,145.062231,-37.66497,30,27/05/2012 13:30,1665,3857
3,Y12H0000107P01T04,Walk,Walking,Recreational,At or Go Home,30,30,Sunday,27/05/2012,May,...,20904122031,20904122028,145.062231,-37.66497,145.087302,-37.659741,15,27/05/2012 14:15,1443,3613
4,Y12H0000107P02T01,Car,Vehicle Passenger,Social,Social,20,20,Sunday,27/05/2012,May,...,20904122028,20904122002,145.087302,-37.659741,145.071523,-37.651478,30,27/05/2012 10:30,1541,1930
5,Y12H0000107P02T02,Car,Vehicle Passenger,Social,At or Go Home,10,10,Sunday,27/05/2012,May,...,20904122002,20904122028,145.071523,-37.651478,145.087302,-37.659741,30,27/05/2012 12:30,1231,2698
6,Y12H0000107P02T03,Cycle,Bicycle,Recreational,Recreational,12,12,Sunday,27/05/2012,May,...,20904122028,20904122031,145.087302,-37.659741,145.062231,-37.66497,30,27/05/2012 13:30,1665,3857
7,Y12H0000107P02T04,Cycle,Bicycle,Recreational,At or Go Home,12,12,Sunday,27/05/2012,May,...,20904122031,20904122028,145.062231,-37.66497,145.087302,-37.659741,15,27/05/2012 14:15,1443,3613
8,Y12H0000107P03T01,Car,Vehicle Passenger,Social,Social,20,20,Sunday,27/05/2012,May,...,20904122028,20904122002,145.087302,-37.659741,145.071523,-37.651478,30,27/05/2012 10:30,1541,1930
9,Y12H0000107P03T02,Car,Vehicle Passenger,Social,At or Go Home,10,10,Sunday,27/05/2012,May,...,20904122002,20904122028,145.071523,-37.651478,145.087302,-37.659741,30,27/05/2012 12:30,1231,2698


In [14]:
trips.Mode.value_counts()

Walk     17115
Car      15764
Cycle     1704
Train     1704
Name: Mode, dtype: int64

In [15]:
# Divide data into walk_trips and not_walk_trips for smaller APIs number of queries
car_trips = trips[trips.Mode.isin(['Car'])]
walk_trips = trips[trips.Mode.isin(['Walk'])]
train_trips = trips[trips.Mode.isin(['Train'])]
cycle_trips = trips[trips.Mode.isin(['Cycle'])]
assert len(trips) == len(car_trips) + len(walk_trips) + len(train_trips) + len(cycle_trips)
walk_trips.shape, car_trips.shape, train_trips.shape, cycle_trips.shape, trips.shape

((17115, 23), (15764, 23), (1704, 23), (1704, 23), (36287, 23))

In [16]:
# Use batch processing as limitation of Google Map APIs service
# ApiError: 429, Too Many Requests, {"error":"Too Many Requests","error_description":"Rate limit for this service has been reached"}

def process_trips_los(trips_df, batch_size, save_path, df_name):
    """
    Process trips and get los from Google Map APIs
    Parameters
        trips_df: trips dataframe
        batch_size: process in batch due to quota limitation of Google Map APIs
        save_path: where to save collected batch files
    """
    batch_idxs = np.arange(len(trips_df)) // batch_size
    for gr, df in trips_df.groupby(batch_idxs):        
        trip_los = df.apply(lambda x: get_trip_los(x), axis=1)
        trip_los.to_csv(f"{save_path}/{df_name}_{gr}.csv")
        print(f"Finished batch {gr+1}", df.shape, trip_los.shape)
        time.sleep(5)
        
        
def join_trips_los(trips_len, batch_size, save_path, df_name):
    batch_idxs = np.arange(trips_len // batch_size + 1)
    trips = []
    for idx in batch_idxs:
        trip_los = pd.read_csv(f"{save_path}/{df_name}_{idx}.csv")
        trips.append(trip_los)
    return pd.concat(trips)

In [17]:
%%time
# RUN ONCE - Get LOS for train_trips
save_path = "../data/los_gmap"
batch_size = 200
process_trips_los(train_trips, batch_size, save_path, "train_trips_los_gmap")

Finished batch 1 (200, 23) (200, 25)
Finished batch 2 (200, 23) (200, 25)
Finished batch 3 (200, 23) (200, 25)
Finished batch 4 (200, 23) (200, 25)
Finished batch 5 (200, 23) (200, 25)
Finished batch 6 (200, 23) (200, 25)
Finished batch 7 (200, 23) (200, 25)
Finished batch 8 (200, 23) (200, 25)
Finished batch 9 (104, 23) (104, 25)
CPU times: user 21.9 s, sys: 338 ms, total: 22.3 s
Wall time: 15min 52s


In [18]:
%%time
# RUN ONCE - Get LOS for car_trips
process_trips_los(car_trips, batch_size, save_path, "car_trips_los_gmap")

Finished batch 1 (200, 23) (200, 25)
Finished batch 2 (200, 23) (200, 25)
Finished batch 3 (200, 23) (200, 25)
Finished batch 4 (200, 23) (200, 25)
Finished batch 5 (200, 23) (200, 25)
Finished batch 6 (200, 23) (200, 25)
Finished batch 7 (200, 23) (200, 25)
Finished batch 8 (200, 23) (200, 25)
Finished batch 9 (200, 23) (200, 25)
Finished batch 10 (200, 23) (200, 25)
Finished batch 11 (200, 23) (200, 25)
Finished batch 12 (200, 23) (200, 25)
Finished batch 13 (200, 23) (200, 25)
Finished batch 14 (200, 23) (200, 25)
Finished batch 15 (200, 23) (200, 25)
Finished batch 16 (200, 23) (200, 25)
Finished batch 17 (200, 23) (200, 25)
Finished batch 18 (200, 23) (200, 25)
Finished batch 19 (200, 23) (200, 25)
Finished batch 20 (200, 23) (200, 25)
Finished batch 21 (200, 23) (200, 25)
Finished batch 22 (200, 23) (200, 25)
Finished batch 23 (200, 23) (200, 25)
Finished batch 24 (200, 23) (200, 25)
Finished batch 25 (200, 23) (200, 25)
Finished batch 26 (200, 23) (200, 25)
Finished batch 27 (20

In [None]:
%%time
# RUN ONCE - Get LOS for cycle_trips
process_trips_los(cycle_trips, batch_size, save_path, "cycle_trips_los_gmap")

In [None]:
%%time
# RUN ONCE - Get LOS for walk_trips
process_trips_los(walk_trips, batch_size, save_path, "walk_trips_los_gmap")

### Join batch files together

In [39]:
train_trips_los = join_trips_los(len(train_trips), batch_size, save_path, "train_trips_los_gmap")
car_trips_los = join_trips_los(len(car_trips), batch_size, save_path, "car_trips_los_gmap")
walk_trips_los = join_trips_los(len(walk_trips), batch_size, save_path, "walk_trips_los_gmap")
cycle_trips_los = join_trips_los(len(cycle_trips), batch_size, save_path, "cycle_trips_los_gmap")
train_trips_los.shape, car_trips_los.shape, walk_trips_los.shape, cycle_trips_los.shape

((1704, 30), (15764, 30), (17115, 30), (1704, 30))

In [69]:
full_trips_los = pd.concat([train_trips_los, car_trips_los, walk_trips_los, cycle_trips_los])
full_trips_los.shape

(36287, 30)

In [70]:
# Remove trips with CAR_TIME == 0
full_trips_los = full_trips_los[(full_trips_los.CAR_TIME != 0) & (full_trips_los.WALK_TIME != 0)]
full_trips_los.drop(columns=["Unnamed: 0"], inplace=True)
full_trips_los.shape

(32904, 29)

In [71]:
full_trips_los.Mode.value_counts()

Car      15448
Walk     14115
Train     1695
Cycle     1646
Name: Mode, dtype: int64

In [72]:
full_trips_los.head()

Unnamed: 0,TRIPID,Mode,LINKMODE,TRIPPURP,DESTPURP1,TRIPTIME,TRAVTIME,TRAVDOW,TRAVDATE,TRAVMONTH,...,DEST_X,DEST_Y,STARTMIN,START_DETAIL_TIME,CAR_TIME,CAR_LEN,CYCLE_TIME,CYCLE_LEN,WALK_TIME,WALK_LEN
0,Y12H0000303P01T01,Train,Train,Work,Work Related,30,30,Wednesday,23/05/2012,May,...,144.997645,-37.831626,10,23/05/2012 7:10,747,9336,2295,9205,8867,8733
1,Y12H0000303P01T02,Train,Train,Work,At or Go Home,40,35,Wednesday,23/05/2012,May,...,145.06152,-37.872402,0,23/05/2012 14:0,686,10328,2330,9192,8869,8782
2,Y12H0000312P01T01,Train,Train,Work,Work Related,55,47,Friday,25/05/2012,May,...,144.963048,-37.817061,0,25/05/2012 8:0,846,13118,3174,12544,12363,12250
3,Y12H0000312P01T04,Train,Train,Work,At or Go Home,65,55,Friday,25/05/2012,May,...,145.06152,-37.872402,15,25/05/2012 18:15,871,13612,87,57,12388,12250
4,Y12H0000326P03T01,Train,Train,Work,Work Related,55,48,Friday,25/05/2012,May,...,144.970239,-37.814414,10,25/05/2012 8:10,984,15354,3724,15021,14415,14272


### Merge with trips_od to get full trip info

In [73]:
trips_od = pd.read_csv("../data/processed/trip_od_full.csv", low_memory=False)
cols = ['Mode', 'LINKMODE', 'TRIPPURP', 'DESTPURP1', 'TRIPTIME', 'TRAVTIME',
        'TRAVDOW', 'TRAVDATE', 'TRAVMONTH', 'TRAVYEAR', 'STARTHOUR', 'STARTIME', 
        'HomeSA1', 'CUMDIST','ORIGSA1', 'DESTSA1', 'ORIG_X', 'ORIG_Y', 'DEST_X', 'DEST_Y']
trips_od = trips_od.drop(columns=cols)

In [74]:
full_trips_los = full_trips_los.merge(trips_od, how='left', on='TRIPID')

In [78]:
# Save trips with LoS for further analysis
full_trips_los.to_csv("../data/processed/trip_gmap_los.csv", index=False)