In [2]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [3]:
ORS_API_KEY = os.getenv("ORS_API_KEY")

In [7]:
import openrouteservice
from openrouteservice import convert

def get_route_and_distance(start_coords, end_coords, api_key):
  client = openrouteservice.Client(key=api_key)  # Use your API key here

  # Define the start and end points
  coords = (start_coords, end_coords)

  try:
    # Request directions
    routes = client.directions(coords)

    # Extract distance and duration
    # distance = routes['routes'][0]['summary']['distance'] / 1000  # Convert meters to kilometers
    distance = routes['routes'][0]['summary']['distance'] # Distance in meters
    duration = routes['routes'][0]['summary']['duration'] / 3600  # Convert seconds to hours

    print(f"Distance: {distance:.2f} m")
    print(f"Estimated Duration: {duration:.2f} hours")

    # For detailed route steps, uncomment the following lines:
    # for step in routes['routes'][0]['segments'][0]['steps']:
    #     print(step['instruction'])

  except openrouteservice.exceptions.ApiError as e:
    print(f"An error occurred: {e}")

# Example usage
api_key = ORS_API_KEY
start_coords = (3.2766339, 6.6010417)  # Longitude, Latitude for New York (Empire State Building)
end_coords = (3.3916154, 6.4501069)  # Longitude, Latitude for Los Angeles (Los Angeles City Hall)
get_route_and_distance(start_coords, end_coords, api_key)

Distance: 27578.30 m
Estimated Duration: 0.38 hours


### Using url ORS API instead of library

In [12]:
import requests
import json

# Your ORS API key
api_key = ORS_API_KEY

# ORS directions endpoint
# Use Cycling-regular profile
url = 'https://api.openrouteservice.org/v2/directions/cycling-regular'

# Request headers including the API key
headers = {
  'Authorization': api_key,
  'Content-Type': 'application/json'
}

# Request parameters
# Example: routing from New York City (longitude, latitude) to Boston
params = {
  'start': '3.2766339, 6.6010417',
  'end': '3.3916154, 6.4501069'
}

# Make the GET request
response = requests.get(url, headers=headers, params=params)

# Check if the request was successful
if response.status_code == 200:
  # Parse the JSON response
  directions = response.json()
  print(json.dumps(directions, indent=2))
else:
  print(f"Error: {response.status_code}")


{
  "type": "FeatureCollection",
  "bbox": [
    3.276634,
    6.449919,
    3.391523,
    6.603127
  ],
  "features": [
    {
      "bbox": [
        3.276634,
        6.449919,
        3.391523,
        6.603127
      ],
      "type": "Feature",
      "properties": {
        "segments": [
          {
            "distance": 30579.5,
            "duration": 6711.7,
            "steps": [
              {
                "distance": 35.2,
                "duration": 9.1,
                "type": 11,
                "instruction": "Head east",
                "name": "-",
                "way_points": [
                  0,
                  1
                ]
              },
              {
                "distance": 234.7,
                "duration": 59.5,
                "type": 0,
                "instruction": "Turn left",
                "name": "-",
                "way_points": [
                  1,
                  7
                ]
              },
              {
       

### Using ORS installed locally

In [13]:
import requests
import json

# Use Cycling-regular profile
url = 'http://localhost:8080/ors/v2/directions/cycling-road?'

start_coords = [3.2766339, 6.6010417]  # Latitude, Longitude
end_coords = [3.3916154, 6.4501069]

params = {
  'start': start_coords,
  'end': end_coords
}

# Make the GET request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
  # Parse the JSON response
  directions = response.json()
  distance = directions['features'][0]['properties']['segments'][0]['distance']
  print(json.dumps(directions, indent=2))
else:
  print(f"Error: {response.status_code}")


Error: 400


In [29]:
distance = directions['features'][0]['properties']['segments'][0]['distance']

print(f"Distance: {distance} meters")

Distance: 1370.9 meters


In [22]:
import requests
import json

# URL of your local ORS API endpoint
base_url = "http://localhost:8080/ors/v2/directions/"

# Coordinates for the origin and destination (replace with your actual values)
start_coords = [3.2766339, 6.6010417]  # Latitude, Longitude
end_coords = [3.3916154, 6.4501069]

# API parameters
profile = "cycling-regular"  # Change this based on your desired routing profile
api_key = ""  # Not required for local instances

# Construct the API URL
url = f"{base_url}{profile}?api_key={api_key}&start={start_coords[1]},{start_coords[0]}&end={end_coords[1]},{end_coords[0]}"

# Send the API request
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()  # Parse the JSON response
    print(json.dumps(data, indent=2))  # Pretty-print the response data
else:
    print(f"Error: {response.status_code}")


Error: 500


#### Create a custom function to get the distance between two points using the ORS API

In [20]:
import requests
import numpy as np

def calculate_distance_with_ors(start_coords, end_coords, profile='cycling-electric'):
    """
    Calculates the distance between two coordinates using OpenRouteService.

    Parameters:
    - start_coords (str): The starting coordinates in 'longitude,latitude' format.
    - end_coords (str): The ending coordinates in 'longitude,latitude' format.
    - profile (str): The routing profile to use. Default is 'cycling-road'.

    Returns:
    - distance (float): The distance in meters between the two coordinates.
    - None if there is an error.
    """
    url = f'http://localhost:8080/ors/v2/directions/{profile}?'
    params = {
        'start': start_coords,
        'end': end_coords
    }

    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Raises an HTTPError if the response code was unsuccessful
        directions = response.json()
        distance = directions['features'][0]['properties']['segments'][0]['distance']
        return distance
    except requests.HTTPError as http_err:
        print(f"HTTP error occurred: {http_err}")
        return np.nan  # Return np.nan on error
    except Exception as err:
        print(f"An error occurred: {err}")
        return np.nan  # Return np.nan on error
    return None

In [21]:
start_coords = '3.2766339,6.60104170'  # Latitude, Longitude
end_coords = '3.3916154,6.45010690'

distance = calculate_distance_with_ors(start_coords, end_coords)
print(distance)

30060.4


#### Preprocess completed orders onlu

In [1]:
import os
os.chdir("../../")
import pandas as pd
from src import display_df
from src import GokadaDataPreprocessor

In [2]:
completed_orders_path = 'data/completed_orders.csv'
drivers_location_path = 'data/drivers_location_during_request.csv'

preprocessor = GokadaDataPreprocessor(completed_orders_path, drivers_location_path)


In [3]:
completed_orders_preprocesseed = pd.read_csv('data/clean/completed_orders_preprocessed.csv')


In [4]:
drivers_location_preprocessed = preprocessor.preprocess_drivers_location()

In [5]:
# df['driver_location'] = df['drivers_lng'].astype(str) + ',' + df['drivers_lat'].astype(str)

In [6]:
display_df(drivers_location_preprocessed)

| id   | order_id   | driver_id   | driver_action   | drivers_lat   | drivers_lng   | driver_location     |
|:-----|:-----------|:------------|:----------------|:--------------|:--------------|:--------------------|
| 1    | 392001     | 243828      | accepted        | 6.60221       | 3.27046       | 3.2704649,6.6022066 |
| 2    | 392001     | 243588      | rejected        | 6.5921        | 3.28744       | 3.2874447,6.5920972 |
| 3    | 392001     | 243830      | rejected        | 6.59613       | 3.28178       | 3.2817841,6.5961334 |
| 4    | 392001     | 243539      | rejected        | 6.59614       | 3.28053       | 3.2805263,6.5961416 |
| 5    | 392001     | 171653      | rejected        | 6.60923       | 3.2888        | 3.2887999,6.6092317 |


In [13]:
# Convert 'order_id' to string
completed_orders_preprocesseed['order_id'] = completed_orders_preprocesseed['order_id'].astype(str)

In [11]:
drivers_location_preprocessed.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1557740 entries, 0 to 1557739
Data columns (total 7 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   id               1557740 non-null  int64  
 1   order_id         1557740 non-null  object 
 2   driver_id        1557740 non-null  int64  
 3   driver_action    1557740 non-null  object 
 4   drivers_lat      1557740 non-null  float64
 5   drivers_lng      1557740 non-null  float64
 6   driver_location  1557740 non-null  object 
dtypes: float64(2), int64(2), object(3)
memory usage: 83.2+ MB


In [22]:
# Merge the two dataframes on order_id column
merged_df = pd.merge(completed_orders_preprocesseed, drivers_location_preprocessed, on='order_id', how='inner')

In [23]:
display_df(merged_df)

| order_id   | trip_origin         | trip_destination    | trip_start_time     | trip_end_time       | day_of_week   | hour_of_day   | day_of_month   | month   | trip_start_date   | trip_end_date   | trip_duration   | trip_origin_latitude   | trip_origin_longitude   | trip_destination_latitude   | trip_destination_longitude   | is_holiday   | is_weekend   | id   | driver_id   | driver_action   | drivers_lat   | drivers_lng   | driver_location     |
|:-----------|:--------------------|:--------------------|:--------------------|:--------------------|:--------------|:--------------|:---------------|:--------|:------------------|:----------------|:----------------|:-----------------------|:------------------------|:----------------------------|:-----------------------------|:-------------|:-------------|:-----|:------------|:----------------|:--------------|:--------------|:--------------------|
| 392001     | 3.2766339,6.6010417 | 3.3916154,6.4501069 | 2021-07-01 09:30:59 | 2021-07-01 09

In [29]:
# get first 10 rows from df using copy
# df_sample = merged_df.head(10).copy()
df = merged_df.copy()

In [30]:
df.loc[:, 'driver_distance'] = df.apply(lambda row: calculate_distance_with_ors(row['driver_location'], row['trip_origin']), axis=1)

HTTP error occurred: 404 Client Error:  for url: http://localhost:8080/ors/v2/directions/cycling-electric?start=3.3321234%2C6.4273972&end=3.3152318%2C6.468257599999999
HTTP error occurred: 400 Client Error:  for url: http://localhost:8080/ors/v2/directions/cycling-electric?start=3.5295703%2C6.4250837&end=4.155025323217046%2C8.226834883140889
HTTP error occurred: 400 Client Error:  for url: http://localhost:8080/ors/v2/directions/cycling-electric?start=3.3107229%2C6.4604495&end=1%2C1
HTTP error occurred: 404 Client Error:  for url: http://localhost:8080/ors/v2/directions/cycling-electric?start=3.4430598%2C6.5427418&end=3.3884795%2C6.5377541


In [32]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1557740 entries, 0 to 1557739
Data columns (total 25 columns):
 #   Column                      Non-Null Count    Dtype  
---  ------                      --------------    -----  
 0   order_id                    1557740 non-null  object 
 1   trip_origin                 1557740 non-null  object 
 2   trip_destination            1557740 non-null  object 
 3   trip_start_time             1557740 non-null  object 
 4   trip_end_time               1557740 non-null  object 
 5   day_of_week                 1557740 non-null  object 
 6   hour_of_day                 1557740 non-null  int64  
 7   day_of_month                1557740 non-null  int64  
 8   month                       1557740 non-null  object 
 9   trip_start_date             1557740 non-null  object 
 10  trip_end_date               1557740 non-null  object 
 11  trip_duration               1557740 non-null  float64
 12  trip_origin_latitude        1557740 non-null  float64
 1

In [33]:
df.to_csv("data/clean/merged_df_with_driver_distance.csv", index=False)

In [8]:
import requests
import numpy as np
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed

def calculate_distance_with_ors_concurrent(start_end_pairs, profile='cycling-electric'):
    """
    Calculates distances for multiple pairs of start and end coordinates concurrently.

    Parameters:
    - start_end_pairs (list of tuples): Each tuple contains start and end coordinates in 'longitude,latitude' format.
    - profile (str): The routing profile to use. Default is 'cycling-electric'.

    Returns:
    - List of distances in meters between the coordinates pairs.
    """
    url = 'http://localhost:8080/ors/v2/directions/{}?'.format(profile)
    distances = [np.nan] * len(start_end_pairs)  # Initialize distances with np.nan

    def fetch_distance(start_coords, end_coords):
        params = {'start': start_coords, 'end': end_coords}
        try:
            response = requests.get(url, params=params)
            response.raise_for_status()
            directions = response.json()
            return directions['features'][0]['properties']['segments'][0]['distance']
        except Exception:
            return np.nan

    with ThreadPoolExecutor() as executor:
        future_to_index = {executor.submit(fetch_distance, pair[0], pair[1]): index for index, pair in enumerate(start_end_pairs)}
        for future in as_completed(future_to_index):
            index = future_to_index[future]
            try:
                distances[index] = future.result()
            except Exception as exc:
                print(f'Generated an exception: {exc}')
                distances[index] = np.nan

    return distances

# Example usage with DataFrame
start_end_pairs = [(row['trip_origin'], row['trip_destination']) for index, row in df.iterrows()]
distances = calculate_distance_with_ors_concurrent(start_end_pairs)
df['trip_distance'] = distances

In [None]:
display_df(df)

| order_id   | trip_origin                        | trip_destination                | trip_start_time     | trip_end_time       | day_of_week   | hour_of_day   | day_of_month   | month   | trip_start_date   | trip_end_date   | trip_duration   | trip_origin_latitude   | trip_origin_longitude   | trip_destination_latitude   | trip_destination_longitude   | trip_distance   |
|:-----------|:-----------------------------------|:--------------------------------|:--------------------|:--------------------|:--------------|:--------------|:---------------|:--------|:------------------|:----------------|:----------------|:-----------------------|:------------------------|:----------------------------|:-----------------------------|:----------------|
| 391996     | 3.37740316890347,6.508813001668548 | 3.3450307,6.650969799999999     | 2021-07-01 07:28:04 | 2021-07-01 07:29:37 | Thursday      | 7             | 1              | July    | 2021-07-01        | 2021-07-01      | 93              | 3.377