# Google Directions API

* Save request response (json) as pickles to cache.
* Each request (one trip / route) can take up to 25 waypoints before broken up into 2 separate requests.

In [1]:
import dotenv
import geopandas as gpd
import googlemaps
import glob
import os
import pandas as pd
import pickle

import setup_gmaps
import utils

from calitp.storage import get_fs
fs = get_fs()

dotenv.load_dotenv("_env")

GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]

DATA_PATH = "./gmaps_cache/"
GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache/"



In [2]:
'''
df = setup_gmaps.make_gmaps_df()

# Create identifier, as we loop through each row to make gmaps request
# if there are errors, can find those later, without having to rerun requests
# use this identifier in pickle naming
df["identifier"] = df.calitp_itp_id.astype(str).str.cat(
                    [df.route_id, df.trip_id, df.shape_id], sep ="__")
                    
print(type(df))
df.to_parquet("./gmaps_df.parquet")
'''

df = pd.read_parquet("./gmaps_df.parquet")
df.head()

Unnamed: 0,calitp_itp_id,route_id,trip_id,shape_id,day_name,trip_departure,destination,origin,departure_in_one_year,waypoints,identifier
0,4,10,13277020,shp-10-10,Thursday,2022-01-06 07:08:00,"[37.670239, -122.08718]","[37.721266, -122.160201]",2023-01-05 07:08:00,"[[37.72521, -122.156367], [37.720677, -122.150...",4__10__13277020__shp-10-10
1,4,12,2883020,shp-12-14,Thursday,2022-01-06 22:58:00,"[37.879358, -122.301843]","[37.79442, -122.271546]",2023-01-05 22:58:00,"[[37.796581, -122.275884], [37.805037, -122.27...",4__12__2883020__shp-12-14
2,4,14,3396020,shp-14-14,Thursday,2022-01-06 06:14:00,"[37.775518, -122.225458]","[37.787142, -122.195417]",2023-01-05 06:14:00,"[[37.783773, -122.201934], [37.778549, -122.20...",4__14__3396020__shp-14-14
3,4,18,10001020,shp-18-18,Thursday,2022-01-06 06:54:00,"[37.884226, -122.299531]","[37.79716, -122.265807]",2023-01-05 06:54:00,"[[37.80017, -122.273479], [37.806506, -122.269...",4__18__10001020__shp-18-18
4,4,19,947020,shp-19-16,Thursday,2022-01-06 22:20:00,"[37.77534, -122.225114]","[37.803658, -122.276263]",2023-01-05 22:20:00,"[[37.801197, -122.273313], [37.788309, -122.27...",4__19__947020__shp-19-16


In [3]:
gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

In [4]:
# Check if there are any identifiers already cached
# Drop those from our df
FILES = [f for f in glob.glob(f"{DATA_PATH}*.p")]
filenames = []
for f in FILES:
    file = f.strip(f'{DATA_PATH}').strip('.p')
    filenames.append(file)

print(f"# rows: {len(df)}")
df = df[~df.identifier.isin(filenames)]
print(f"# rows after local caches included: {len(df)}")

# rows: 1619
# rows after local caches included: 1344


In [5]:
origin = df.origin.tolist()
destination = df.destination.tolist()
departures = df.departure_in_one_year.tolist()
waypoints = df.waypoints.tolist()
identifiers = df.identifier.tolist()

In [6]:
# https://stackoverflow.com/questions/25052980/use-pickle-to-save-dictionary-in-python
def save_request_pickle(my_list, name):
    # result comes back as a list, but add [0] and it's a dict
    my_dict = my_list[0]
    
    # Save as local pickle
    pickle.dump(my_dict, open(f"{DATA_PATH}{name}.p", "wb"))
    
    # pickle can't be written directly to GCS. Put the pickle object there. 
    fs.put(f"./{DATA_PATH}{name}.p", f"{GCS_FILE_PATH}{name}.p")
    print(f"Saved {name}")
    
def open_request_pickle(name):
    # Download object from GCS bucket (pickle can't be read directly)
    gcs_pickle = fs.get(f"{GCS_FILE_PATH}{name}.p", f"{DATA_PATH}{name}.p")
    my_dict = pickle.load(open(f"{DATA_PATH}{name}.p", "rb"))
    
    return my_dict

In [7]:
no_results = []
for i, (o, d) in enumerate(zip(origin, destination)):
    try:
        result = gmaps.directions(
            o, d, 
            mode='driving', 
            departure_time=departures[i], 
            waypoints=[f"via:{lat},{lon}" for lat, lon in waypoints[i]]
        )

        save_request_pickle(result, identifiers[i])
    except:
        print(f"No result: {identifiers[i]}")
        no_results.append(identifiers[i])

Saved 16__1__t1F4-b66-sl18__5551_shp
Saved 16__11__t8C5-b44F-sl18__5559_shp
Saved 16__12__t258-b4B3-sl18__5567_shp
Saved 16__2__t861-b12F-sl18__5575_shp
Saved 16__3__t2CB-bCB-sl18__5583_shp
Saved 16__52__t7EE-b1452-sl18__5623_shp
Saved 16__7__t86B-b2BE-sl18__5635_shp
Saved 16__748__t6B3-b12432-sl18__5646_shp
Saved 16__785__t15E-b1EAB-sl18__5650_shp
Saved 16__786__t1C2-b1EB7-sl18__5656_shp
Saved 16__787__t1A4-b1EC0-sl18__5662_shp
Saved 16__98__t5AC-b2649-sl18__5705_shp
Saved 48__341ad816-5cb1-49c9-813b-170ee71d1c8e__d4fc8054-1ae1-4914-ac91-a16366672799__7174038f-3569-41ef-999d-a7b9d5bb154a
Saved 48__d73c41c9-d17f-4a43-844c-efb35237e8bd__03656e1d-89d6-469d-ba3a-bdf7bbb704fa__8875da1b-3698-47c6-9fe9-39576c045b3c
No result: 106__Shellmound/Powell__12_Trip10_SP_COVID__p_899325
Saved 116__3166__479210__17529
Saved 116__3168__479449__17535
Saved 116__3169__479702__17540
Saved 118__11024__t_1365998_b_29676_tn_0__p_282428
Saved 118__11025__t_282981_b_30955_tn_0__p_282416
Saved 118__11027__t_136

In [None]:
'''
# Use via waypoints to get duration_in_traffic
# longest travel time compared to sum(waypoints)
via = gmaps.directions(origin, destination, 
                 mode='driving', 
                 departure_time=departures, 
                 waypoints=[f"via:{lat},{lon}" for lat, lon in waypoints]
                )
'''