In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
df_completed_orders = pd.read_csv('../data/raw/completed_orders.csv')
df_delivery_requests = pd.read_csv('../data/raw/delivery_requests.csv')

In [None]:
# Distribution of trip start times
plt.figure(figsize=(12, 6))
sns.histplot(df_completed_orders['Trip Start Time'], bins=30, kde=True)
plt.title('Distribution of Trip Start Times')
plt.xticks(rotation=45)
plt.show()


In [None]:
# Distribution of trip end times
plt.figure(figsize=(12, 6))
sns.histplot(df_completed_orders['Trip End Time'], bins=30, kde=True)
plt.title('Distribution of Trip End Times')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Box plot of trip duration
df_completed_orders['Trip Start Time'] = pd.to_datetime(df_completed_orders['Trip Start Time'])
df_completed_orders['Trip End Time'] = pd.to_datetime(df_completed_orders['Trip End Time'])
df_completed_orders['trip_duration'] = (df_completed_orders['Trip End Time'] - df_completed_orders['Trip Start Time']).dt.total_seconds() / 60

plt.figure(figsize=(12, 6))
sns.boxplot(x='trip_duration', data=df_completed_orders)
plt.title('Box Plot of Trip Duration')
plt.show()

In [None]:
# Count plot of driver actions
plt.figure(figsize=(12, 6))
sns.countplot(y='driver_action', data=df_delivery_requests, order=df_delivery_requests['driver_action'].value_counts().index)
plt.title('Count of Driver Actions')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Calculate completion rates
rain_completion_rate = df_orders[df_orders['rain'] == 1]['Trip ID'].count() / len(df_orders[df_orders['rain'] == 1])
no_rain_completion_rate = df_orders[df_orders['rain'] == 0]['Trip ID'].count() / len(df_orders[df_orders['rain'] == 0])

# Plot
plt.figure(figsize=(8, 6))
sns.barplot(x=['Rainy', 'No Rain'], y=[rain_completion_rate, no_rain_completion_rate])
plt.title('Order Completion Rate: Rainy vs No Rain Days')
plt.ylabel('Completion Rate')
plt.ylim(0, 1)
plt.show()


In [None]:
# Traffic Condition
traffic_completion_rate = df_orders.groupby('traffic_condition')['Trip ID'].count() / len(df_orders['traffic_condition'].unique())

# Hour of the Time
hourly_completion_rate = df_orders.groupby('start_hour')['Trip ID'].count() / len(df_orders['start_hour'].unique())

# Weekend vs Weekday
weekend_completion_rate = df_orders[df_orders['weekend'] == 1]['Trip ID'].count() / len(df_orders[df_orders['weekend'] == 1])
weekday_completion_rate = df_orders[df_orders['weekend'] == 0]['Trip ID'].count() / len(df_orders[df_orders['weekend'] == 0])

# Plotting examples (customize as per your data and preferences)
# Example: Traffic Condition
plt.figure(figsize=(10, 6))
sns.barplot(x=traffic_completion_rate.index, y=traffic_completion_rate.values)
plt.title('Order Completion Rate by Traffic Condition')
plt.xlabel('Traffic Condition')
plt.ylabel('Completion Rate')
plt.show()

# Example: Hour of the Time
plt.figure(figsize=(10, 6))
sns.lineplot(x=hourly_completion_rate.index, y=hourly_completion_rate.values)
plt.title('Order Completion Rate by Hour of the Day')
plt.xlabel('Hour of the Day')
plt.ylabel('Completion Rate')
plt.xticks(rotation=45)
plt.show()

# Example: Weekend vs Weekday
plt.figure(figsize=(8, 6))
sns.barplot(x=['Weekend', 'Weekday'], y=[weekend_completion_rate, weekday_completion_rate])
plt.title('Order Completion Rate: Weekend vs Weekday')
plt.ylabel('Completion Rate')
plt.ylim(0, 1)
plt.show()


In [None]:
import datashader as ds
import datashader.transfer_functions as tf
import matplotlib.pyplot as plt
import pandas as pd

# Load datasets
df_completed_orders = pd.read_csv('../data/completed_orders_with_clusters.csv')
df_delivery_requests = pd.read_csv('../data/delivery_requests_with_clusters.csv')

# Plot delivery starting locations
cvs = ds.Canvas(plot_width=800, plot_height=800)
agg = cvs.points(df_delivery_requests, 'lng', 'lat', ds.count())
img = tf.shade(agg, cmap=["lightblue", "darkblue"])

# Display the image
plt.imshow(img.to_pil())
plt.axis('off')
plt.show()

# Plot delivery clusters
fig, ax = plt.subplots(figsize=(10, 6))
scatter = ax.scatter(df_delivery_requests['lng'], df_delivery_requests['lat'], c=df_delivery_requests['cluster'], cmap='viridis', s=0.5)
legend = ax.legend(*scatter.legend_elements(), title="Clusters")
ax.add_artist(legend)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Delivery Start Location Clusters')
plt.show()
