In [1]:
from geopy.geocoders import Nominatim
import pandas as pd
import requests
import os
from dotenv import load_dotenv

In [2]:
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)

data_folder = os.path.join(parent_dir,"data")
raw_data_folder = os.path.join(data_folder,"raw")
interim_data_folder = os.path.join(data_folder,"interim")
processed_data_folder = os.path.join(data_folder, "processed")

raw_data_file = os.path.join(raw_data_folder, 'rawSampledData.csv')
unique_combo_dir = os.path.join(interim_data_folder, 'unique_combo.csv')
ride_locations_dir = os.path.join(interim_data_folder, "ride_locations.csv")
rides_with_eta_dir = os.path.join(interim_data_folder, "rides_with_etas_rush_hour.csv")


In [3]:
load_dotenv()

# Access the API key
mapbox_api_key = os.getenv('MAPBOX_API_KEY')

In [4]:
df = pd.read_csv(raw_data_file)

# Ensure the datetime column is in the correct datetime format
df['datetime'] = pd.to_datetime(df['datetime']).dt.floor('h')

# Drop duplicate combinations of source, destination, and datetime
unique_combos = df[["source", "destination", "datetime"]].drop_duplicates()

# # Save unique combinations as a Parquet file
# unique_combos_dir = os.path.join(interim_data_folder, "unique_combo_2.parquet")
# unique_combos.to_parquet(unique_combos_dir, index=False)

In [5]:
coordinates = pd.read_csv(ride_locations_dir)

In [6]:
unique_combos = unique_combos.merge(
    coordinates[["source", "destination", "source_lat", "source_long", "destination_lat", "destination_long"]],
    on=["source", "destination"],
    how="left"
)

In [7]:
unique_combos

Unnamed: 0,source,destination,datetime,source_lat,source_long,destination_lat,destination_long
0,Theatre District,Fenway,2018-12-13 20:00:00,42.351900,-71.064300,42.345187,-71.104599
1,Beacon Hill,Haymarket Square,2018-12-02 09:00:00,42.358708,-71.067829,42.362950,-71.057845
2,Northeastern University,North Station,2018-11-28 21:00:00,42.338954,-71.088058,42.365282,-71.060150
3,Fenway,Back Bay,2018-12-13 05:00:00,42.345187,-71.104599,42.350707,-71.079730
4,Northeastern University,Beacon Hill,2018-12-02 09:00:00,42.338954,-71.088058,42.358708,-71.067829
...,...,...,...,...,...,...,...
22598,South Station,West End,2018-12-02 15:00:00,42.352508,-71.054945,42.363919,-71.063899
22599,Northeastern University,Financial District,2018-12-03 20:00:00,42.338954,-71.088058,42.355838,-71.055616
22600,Fenway,West End,2018-12-14 05:00:00,42.345187,-71.104599,42.363919,-71.063899
22601,Beacon Hill,Northeastern University,2018-11-30 04:00:00,42.358708,-71.067829,42.338954,-71.088058


In [8]:
unique_combos['datetime'] = unique_combos['datetime'].dt.strftime('%Y-%m-%dT%H:00')

In [9]:
unique_combos

Unnamed: 0,source,destination,datetime,source_lat,source_long,destination_lat,destination_long
0,Theatre District,Fenway,2018-12-13T20:00,42.351900,-71.064300,42.345187,-71.104599
1,Beacon Hill,Haymarket Square,2018-12-02T09:00,42.358708,-71.067829,42.362950,-71.057845
2,Northeastern University,North Station,2018-11-28T21:00,42.338954,-71.088058,42.365282,-71.060150
3,Fenway,Back Bay,2018-12-13T05:00,42.345187,-71.104599,42.350707,-71.079730
4,Northeastern University,Beacon Hill,2018-12-02T09:00,42.338954,-71.088058,42.358708,-71.067829
...,...,...,...,...,...,...,...
22598,South Station,West End,2018-12-02T15:00,42.352508,-71.054945,42.363919,-71.063899
22599,Northeastern University,Financial District,2018-12-03T20:00,42.338954,-71.088058,42.355838,-71.055616
22600,Fenway,West End,2018-12-14T05:00,42.345187,-71.104599,42.363919,-71.063899
22601,Beacon Hill,Northeastern University,2018-11-30T04:00,42.358708,-71.067829,42.338954,-71.088058


In [None]:
def get_eta(source_lat, source_long, destination_lat, destination_long, mode='driving', depart_at=None):
    # Your Mapbox API token
    mapbox_token = mapbox_api_key # Replace with your actual token

    # Base URL with source and destination coordinates
    url = f"https://api.mapbox.com/directions/v5/mapbox/{mode}/{source_long},{source_lat};{destination_long},{destination_lat}"
    
    # Define the request parameters
    params = {
        'access_token': mapbox_token,
        'geometries': 'geojson',
        'overview': 'full',
        'steps': 'true',
        'alternatives': 'true',  # Include alternative routes if available
        'language': 'en'
    }
    
    # Add 'depart_at' parameter if provided
    if depart_at:
        params['depart_at'] = depart_at
    
    # Make the request
    response = requests.get(url, params=params)
    
    # Check response status
    if response.status_code == 200:
        data = response.json()
        
        # Verify 'routes' exists in the response
        if 'routes' in data and data['routes']:
            # Get travel time in seconds
            travel_time_seconds = data['routes'][0]['duration']
            
            # Convert seconds to minutes
            travel_time_minutes = travel_time_seconds / 60
            return round(travel_time_minutes, 2)
        else:
            print(f"Error: 'routes' not found in response for coordinates {source_lat}, {source_long} to {destination_lat}, {destination_long}")
            return None
    else:
        print(f"API request failed with status code {response.status_code}: {response.text}")
        return None


In [11]:
get_eta(42.3519, -71.0643, 42.345187, -71.104599,mode = 'driving', depart_at='2018-12-13T20:40')

15.58

In [12]:
unique_combos['eta_minutes'] = unique_combos.apply(
    lambda row: get_eta(
        row['source_lat'], 
        row['source_long'], 
        row['destination_lat'], 
        row['destination_long'], 
        depart_at=row['datetime']
    ),
    axis=1
)

In [14]:
unique_combos.to_parquet(rides_with_eta_dir)

In [None]:
unique_combos = unique_combos.drop(['source_lat', 'source_long', 'destination_lat', 'destination_long'], axis = 1)

In [17]:
unique_combos.to_parquet(rides_with_eta_dir)