In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('data/delivery_time.txt')
df.head()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
# Set the earth's radius (in kilometers)
R = 6371


# Convert degrees to radians
def deg_to_rad(degrees):
    return degrees * (np.pi / 180)


# Function to calculate the distance between two points using the haversine formula
def calculate_distance(lat1, lon1, lat2, lon2):
    d_lat = deg_to_rad(lat2 - lat1)
    d_lon = deg_to_rad(lon2 - lon1)
    a = np.sin(d_lat / 2)**2 + np.cos(deg_to_rad(lat1)) * \
        np.cos(deg_to_rad(lat2)) * np.sin(d_lon/2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    return R * c


# Calculate the distance between each pair of points
df['distance'] = np.nan

for i in range(len(df)):
    df.loc[i, 'distance'] = calculate_distance(df.loc[i, 'Restaurant_latitude'],
                                               df.loc[i, 'Restaurant_longitude'],
                                               df.loc[i, 'Delivery_location_latitude'],
                                               df.loc[i, 'Delivery_location_longitude'])

In [None]:
df.head()

In [None]:
fig = px.scatter(data_frame=df,
                 x='distance',
                 y='Time_taken(min)',
                 size='Time_taken(min)',
                 trendline='ols',
                 title='Relationship Between Distance and Time Taken',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Delivery_person_Age',
                 y='Time_taken(min)',
                 color='distance',
                 size='Time_taken(min)',
                 trendline='ols',
                 title='Relationship Between Age and Time Taken',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.scatter(data_frame=df,
                 x='Delivery_person_Ratings',
                 y='Time_taken(min)',
                 color='distance',
                 size='Time_taken(min)',
                 trendline='ols',
                 title='Relationship Between Ratings and Time Taken',
                 width=800,
                 height=400)
fig.show()

In [None]:
fig = px.box(data_frame=df,
             x='Type_of_vehicle',
             y='Time_taken(min)',
             color='Type_of_order',
             width=800,
             height=400)
fig.show()

In [None]:
X = np.array(
    df[['Delivery_person_Age', 'Delivery_person_Ratings', 'distance']])
y = np.array(df[['Time_taken(min)']])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.10,
                                                    random_state=7)

In [None]:
model = Sequential()
model.add(layer=LSTM(units=128,
                     return_sequences=True,
                     input_shape=(X_train.shape[1], 1)))
model.add(layer=LSTM(units=64, return_sequences=False))
model.add(layer=Dense(units=32))
model.add(layer=Dense(units=1))

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, batch_size=1, epochs=10, verbose=0)

In [None]:
a, b, c = 29, 2.9, 6

print(f"       Age of Delivery Partner: {a}")
print(f"Ratings of Previous Deliveries: {b}")
print(f"                Total Distance: {c}")

features = np.array([[a, b, c]])
predicted = model.predict(features, verbose=0)[0][0]
print(f"Predicted Delivery Time in Minutes = {int(predicted)} mins")