# Compare car `duration_in_traffic` with bus `service_hours`

* Move Google Directions API requests to `make_gmaps_requests`
* Try to cache results and only revisit the trips where no request was given (skipped), to see if there's something else to fix in how waypoints were constructed

In [None]:
import glob
import os
import pandas as pd

import utils

DATA_PATH = "./gmaps_cache/"
GCS_FILE_PATH = f"{utils.GCS_FILE_PATH}gmaps_cache/"

In [None]:
df = pd.read_parquet(f"{utils.GCS_FILE_PATH}gmaps_df.parquet")

In [None]:
result_ids = list(df.identifier)

successful_ids = []
durations = []

for i in result_ids:
    try:
        json_dict = utils.open_request_json(i, DATA_PATH = DATA_PATH, 
                                GCS_FILE_PATH = GCS_FILE_PATH
                               )
        duration_in_sec = json_dict["legs"][0]["duration_in_traffic"]["value"]
        durations.append(duration_in_sec)
        successful_ids.append(i)
    except:
        print(f"Not found: {i}")

In [None]:
# Double check lengths match
print(f"# results_ids: {len(successful_ids)}")
print(f"# durations: {len(durations)}")

if len(successful_ids) == len(durations):
    results_df = pd.DataFrame(
        {'identifier': successful_ids,
         'duration_in_sec': durations,
        })
    

In [None]:
final = pd.merge(df, 
                 results_df, 
                 on = "identifier",
                 how = "left", 
                 validate = "1:1"
                )

def compare_travel_time_by_mode(df):
    df = df.assign(
        car_duration_hrs = df.duration_in_sec.divide(60 * 60)
    )
    
    df = df.assign(
        competitive = df.apply(lambda x: 
                               1 if x.service_hours <= (x.car_duration_hrs * 2)
                               else 0,  axis=1)
    )
    return df

final2 = compare_travel_time_by_mode(final)

final2.to_parquet("./results.parquet")

In [None]:
# Check if there are any identifiers already cached
# Drop those from our df
FILES = [f for f in glob.glob(f"{DATA_PATH}*.json")]
filenames = []
for f in FILES:
    file = f.strip(f'{DATA_PATH}').strip('.json')
    filenames.append(file)

print(f"# rows: {len(df)}")
print(f"# rows with results: {len(final[final.duration_in_sec.notna()])}")
print(f"# rows with no results: {len(final[final.duration_in_sec.isna()])}")

## Competitive Routes

In [None]:
results = pd.read_parquet("./results.parquet")
results.competitive.value_counts()

## Trips with no results from Google

In [None]:
import dotenv
import googlemaps
import os

dotenv.load_dotenv("_env")

GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]

gmaps = googlemaps.Client(key=GOOGLE_API_KEY)

In [None]:
no_results = pd.read_parquet("./results.parquet")
no_results = no_results[no_results.duration_in_sec.isna()]

In [None]:
for i, (o, d) in enumerate(zip(origin, destination)):
    try:
        result = gmaps.directions(
            o, d, 
            mode='driving', 
            departure_time=departures[i], 
            waypoints=[f"via:{lat},{lon}" for lat, lon in waypoints[i]]
        )

        utils.save_request_json(
            result, identifiers[i], 
            # Using different paths than utils.DATA_PATH, utils.GCS_FILE_PATH
            DATA_PATH = DATA_PATH,
            GCS_FILE_PATH = GCS_FILE_PATH
        )
    except:
        print(f"No result: {identifiers[i]}")