# Food Delivery System Data Analysis

This notebook provides comprehensive analysis of food delivery data including:
- Revenue trends and patterns
- Delivery performance metrics
- Customer satisfaction analysis
- Payment method preferences
- Geographic and temporal patterns


## 1. Import Libraries and Load Data


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
%matplotlib inline


In [None]:
# Load data
df = pd.read_csv('../data/food_delivery_data.csv')
df['order_date'] = pd.to_datetime(df['order_date'])

print(f"Data loaded: {len(df):,} records")
print(f"Date range: {df['order_date'].min()} to {df['order_date'].max()}")
df.head()


In [None]:
# Basic statistics
df.describe()


## 2. Revenue Analysis


In [None]:
# Total revenue
total_revenue = df['total_amount'].sum()
avg_order_value = df['order_value'].mean()

print(f"Total Revenue: ${total_revenue:,.2f}")
print(f"Average Order Value: ${avg_order_value:.2f}")
print(f"Total Orders: {len(df):,}")


In [None]:
# Daily revenue trend
daily_revenue = df.(df['order_date'].dt.date)['total_amount'].sum()

plt.figure(figsize=(14, 6))
plt.plot(daily_revenue.index, daily_revenue.values, linewidth=2, color='steelblue')
plt.title('Daily Revenue Trend', fontsize=16, fontweight='bold')
plt.xlabel('Date', fontsize=12)
plt.ylabel('Revenue ($)', fontsize=12)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
# Revenue by day of week
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
revenue_by_day = df.groupby('day_of_week')['total_amount'].mean()
revenue_by_day = revenue_by_day.reindex([d for d in day_order if d in revenue_by_day.index])

plt.figure(figsize=(10, 6))
plt.bar(range(len(revenue_by_day)), revenue_by_day.values, color='coral')
plt.xticks(range(len(revenue_by_day)), revenue_by_day.index, rotation=45)
plt.xlabel('Day of Week', fontsize=12)
plt.ylabel('Average Revenue ($)', fontsize=12)
plt.title('Average Revenue by Day of Week', fontsize=16, fontweight='bold')
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()


In [None]:
# Top restaurants by revenue
top_restaurants = df.groupby('restaurant')['total_amount'].sum().nlargest(10)

plt.figure(figsize=(12, 8))
plt.barh(range(len(top_restaurants)), top_restaurants.values, color='steelblue')
plt.yticks(range(len(top_restaurants)), top_restaurants.index)
plt.xlabel('Total Revenue ($)', fontsize=12)
plt.title('Top 10 Restaurants by Revenue', fontsize=16, fontweight='bold')
plt.gca().invert_yaxis()
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()


## 3. Delivery Performance Analysis


In [None]:
# Delivery time statistics
print(f"Mean Delivery Time: {df['delivery_time_minutes'].mean():.2f} minutes")
print(f"Median Delivery Time: {df['delivery_time_minutes'].median():.2f} minutes")
print(f"Standard Deviation: {df['delivery_time_minutes'].std():.2f} minutes")

# On-time delivery rate
on_time_threshold = 45
on_time_rate = (df['delivery_time_minutes'] < on_time_threshold).mean() * 100
print(f"\nOn-Time Delivery Rate (< {on_time_threshold} min): {on_time_rate:.2f}%")


In [None]:
# Delivery time distribution
plt.figure(figsize=(10, 6))
plt.hist(df['delivery_time_minutes'], bins=30, edgecolor='black', alpha=0.7, color='skyblue')
plt.axvline(df['delivery_time_minutes'].mean(), color='red', 
           linestyle='--', linewidth=2, label=f'Mean: {df["delivery_time_minutes"].mean():.1f} min')
plt.title('Delivery Time Distribution', fontsize=16, fontweight='bold')
plt.xlabel('Delivery Time (minutes)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


## 4. Customer Satisfaction Analysis


In [None]:
# Rating distribution
rating_counts = df['rating'].value_counts().sort_index()
print("Rating Distribution:")
for rating, count in rating_counts.items():
    percentage = (count / len(df)) * 100
    print(f"  {rating} stars: {count:,} ({percentage:.2f}%)")

print(f"\nAverage Rating: {df['rating'].mean():.2f}/5.0")


In [None]:
# Rating distribution visualization
plt.figure(figsize=(10, 6))
colors = ['#d62728', '#ff7f0e', '#ffbb78', '#2ca02c', '#1f77b4']
plt.bar(rating_counts.index, rating_counts.values, color=colors[:len(rating_counts)])
plt.xlabel('Rating (stars)', fontsize=12)
plt.ylabel('Number of Orders', fontsize=12)
plt.title('Rating Distribution', fontsize=16, fontweight='bold')
plt.xticks(rating_counts.index)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()


## 5. Payment Method Analysis


In [None]:
# Payment method distribution
payment_counts = df['payment_method'].value_counts()
payment_percentages = (payment_counts / len(df)) * 100

plt.figure(figsize=(10, 6))
plt.pie(payment_counts.values, labels=payment_counts.index, autopct='%1.1f%%', startangle=90)
plt.title('Payment Method Distribution', fontsize=16, fontweight='bold')
plt.axis('equal')
plt.tight_layout()
plt.show()


## 6. Key Insights


In [None]:
print("="*60)
print("KEY INSIGHTS")
print("="*60)

print(f"\n1. Total Revenue: ${df['total_amount'].sum():,.2f}")
print(f"2. Average Delivery Time: {df['delivery_time_minutes'].mean():.1f} minutes")
print(f"3. Average Customer Rating: {df['rating'].mean():.2f}/5.0")
print(f"4. Most Popular Payment Method: {df['payment_method'].mode()[0]}")
print(f"5. Top City by Orders: {df.groupby('city').size().idxmax()}")
print(f"6. Top Restaurant by Revenue: {df.groupby('restaurant')['total_amount'].sum().idxmax()}")
