In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt 

In [2]:
data=pd.read_csv("Data/finalTrain.csv")

In [3]:
df=data.copy()

In [4]:
#to split the dependent and target features
X=df.iloc[:,:-1]
y=df.iloc[:,-1:]

In [5]:
numerical_cols=X.select_dtypes(exclude='object').columns
categorical_cols=X.select_dtypes(include='object').columns

In [6]:
numerical_cols

Index(['Delivery_person_Age', 'Delivery_person_Ratings', 'Restaurant_latitude',
       'Restaurant_longitude', 'Delivery_location_latitude',
       'Delivery_location_longitude', 'Vehicle_condition',
       'multiple_deliveries'],
      dtype='object')

In [7]:
categorical_cols

Index(['ID', 'Delivery_person_ID', 'Order_Date', 'Time_Orderd',
       'Time_Order_picked', 'Weather_conditions', 'Road_traffic_density',
       'Type_of_order', 'Type_of_vehicle', 'Festival', 'City'],
      dtype='object')

In [8]:
Road_traffic_density_cat=['Low','Medium','High','Jam']

In [9]:
from sklearn.base import BaseEstimator,TransformerMixin


In [10]:
class DistanceCalculator(TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        distances = np.empty((len(X), 1))

        for i in range(len(X)):
            distances[i] = self._calculate_distance(X.loc[i, 'Restaurant_latitude'], 
                                                     X.loc[i, 'Restaurant_longitude'], 
                                                     X.loc[i, 'Delivery_location_latitude'], 
                                                     X.loc[i, 'Delivery_location_longitude'])

        distances_df = pd.DataFrame(distances, columns=['distance'])                                             

        return distances_df
    
    
    # Set the earth's radius (in kilometers)
    R = 6371

    # Convert degrees to radians
    def deg_to_rad(self,degrees):
        return degrees * (np.pi/180)

    # Function to calculate the distance between two points using the haversine formula
    def _calculate_distance(self,lat1, lon1, lat2, lon2):
        d_lat = deg_to_rad(lat2-lat1)
        d_lon = deg_to_rad(lon2-lon1)
        a = np.sin(d_lat/2)**2 + np.cos(deg_to_rad(lat1)) * np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
        c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
        return R * c