In [59]:
import googlemaps
import os
import dotenv
from datetime import datetime
import pandas as pd
import numpy as np
import time

dotenv.load_dotenv()

True

#### Some Initial setup details

- [As of Feb 22, 2024], Be in aware that it costs \$ $\frac{5}{1000}$ per request. 
  - example, 118 origin x 118 destination = 13924 requests = 69.62 USD

In [216]:
run = False # Prevent accidental runs
api_key = os.getenv("GOOGLE_API_KEY")

mode = 'bicycling' # or "transit"
time_compute = datetime(2024, 2, 23, 7, 30, 0) # the time of day to compute the travel time

column_name_for_time_at_this_time = f"T{time_compute.hour}{time_compute.minute}_{mode}_(In Minutes)"
output_file_name = f"T{time_compute.hour}{time_compute.minute}_{mode}.parquet"

print(f"column_name == {column_name_for_time_at_this_time}, output_file_name == {output_file_name}")

column_name == T730_bicycling_(In Minutes), output_file_name == T730_bicycling.parquet


In [3]:
# initialize the google maps client
gmaps = googlemaps.Client(key=api_key)

#### Get Kiosk Data

In [212]:
kiosk_data = pd.read_csv(os.path.join(os.environ['MOHAMMAD_SHARED_PATH'], "Google Trip Data","Kiosks_Data.csv"))

In [6]:
kiosk_data_filterd = kiosk_data[['Kiosk Name', "Latitude", "Longitude", 'Address']].copy()
kiosk_data_filterd['Coordinates'] = kiosk_data_filterd['Latitude'].astype(str) + ',' + kiosk_data_filterd['Longitude'].astype(str)
kiosk_data_filterd = kiosk_data_filterd[kiosk_data_filterd['Coordinates']!="0.0,0.0"]
kiosk_data_filterd.sort_values(by='Coordinates', inplace=True)

In [7]:
kiosk_data_filterd

Unnamed: 0,Kiosk Name,Latitude,Longitude,Address,Coordinates
93,Heartland Helpdesk,39.97233,-75.14500,1144 N 11th St,"39.97233,-75.145"
99,Mahoney State Park,41.03038,-96.31184,Mahoney State Park,"41.03038,-96.31184"
120,Walnut Creek Recreation Area (Papillion),41.13997,-96.06433,11601 S 96th St,"41.13997,-96.06433"
46,36th & Raynor Parkway (Bellevue),41.14896,-95.96814,W Papio Trail,"41.14896,-95.96814"
110,Prairie Queen Recreation Area (Papillion),41.15487,-96.11240,Lincoln Rd,"41.15487,-96.1124"
...,...,...,...,...,...
37,24th & Wirt St,41.28661,-95.94713,3014 N 24th Street,"41.28661,-95.94713"
104,NOTC 31st Ave & Taylor,41.29800,-95.95830,4308 N 31st Ave,"41.298,-95.9583"
100,MCC Fort Bookstore N 32nd St,41.30424,-95.95923,N 32nd St,"41.30424,-95.95923"
101,MCC North 30th St,41.30981,-95.95684,N 30th St,"41.30981,-95.95684"


In [60]:
coordinates = kiosk_data_filterd['Coordinates'].values  

coordinates

result is in shape of 

origin x destination

In [167]:
res_dict = {}

count = 0
if run:
    for i in range(0, len(coordinates)-1, 10):
        for j in range(0, len(coordinates)-1, 10):
            org_start = i
            org_end = min(i+10, len(coordinates)) ### its logical to subtract 1 from len here but, when slicing, the right is exclusive
            dest_start = j
            dest_end = min(j+10, len(coordinates))

            if (org_end -org_start == 0) or (dest_end - dest_start == 0):
                print("No more data to compute. Exiting... this loop")
            else:
                origins = coordinates[org_start:org_end]
                destinations = coordinates[dest_start:dest_end]



                res = gmaps.distance_matrix(origins, destinations, mode='bicycling', departure_time=time_compute)

                
                ### result is in shape of org x dest
                ### Result is in res['rows'][org]['elements'][dest]
                for org in range(len(origins)):

                    for dest in range(len(destinations)):

                        if res['rows'][org]['elements'][dest]['status'] == 'OK':
                            res_dict[count] = {
                                "Destination_Coordinates": destinations[dest],
                                "Origin_Coordinates": origins[org],
                                "Origin_Address": res['origin_addresses'][org],
                                "Destination_Address": res['destination_addresses'][dest],
                                "Duration (In Seconds)": res['rows'][org]['elements'][dest]['duration']['value'],
                                "Distance (In Meters)": res['rows'][org]['elements'][dest]['distance']['value']
                            }
                        else:
                            res_dict[count] = {
                                "Destination_Coordinates": destinations[dest],
                                "Origin_Coordinates": origins[org],
                                "Origin_Address": res['origin_addresses'][org],
                                "Destination_Address": res['destination_addresses'][dest],
                                "Duration (In Seconds)": np.nan,
                                "Distance (In Meters)": np.nan
                            }
                        count += 1




                

In [213]:
# Convert resulting dictionary to a dataframe
df_res = pd.DataFrame(res_dict).T

In [214]:
df_res[column_name_for_time_at_this_time] = df_res[f'Duration (In Seconds)']/60

In [215]:
df_res.to_parquet(os.path.join(os.environ['OUTPUT_PATH'], output_file_name))