In [1]:
#import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Visual our Dataset
df = pd.read_csv('pizza_sales.csv')
df.head(10)
df.describe()
df.shape
df.info()
df.isnull().sum()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48620 entries, 0 to 48619
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   pizza_id           48620 non-null  int64  
 1   order_id           48620 non-null  int64  
 2   pizza_name_id      48620 non-null  object 
 3   quantity           48620 non-null  int64  
 4   order_date         48620 non-null  object 
 5   order_time         48620 non-null  object 
 6   unit_price         48620 non-null  float64
 7   total_price        48620 non-null  float64
 8   pizza_size         48620 non-null  object 
 9   pizza_category     48620 non-null  object 
 10  pizza_ingredients  48620 non-null  object 
 11  pizza_name         48620 non-null  object 
dtypes: float64(2), int64(3), object(7)
memory usage: 4.5+ MB


pizza_id             0
order_id             0
pizza_name_id        0
quantity             0
order_date           0
order_time           0
unit_price           0
total_price          0
pizza_size           0
pizza_category       0
pizza_ingredients    0
pizza_name           0
dtype: int64

In [None]:
df.head(10)

In [None]:
df["pizza_id"].duplicated().sum()

In [None]:
# Total revenue
total_revenue = df['total_price'].sum()
print(f'Total Revenue: ${total_revenue:.0f}')

In [None]:
# Average order value
average_order_value_per_order = df.groupby('order_id')['total_price'].sum().mean()
print(f'Average Order Value (per distinct order): ${average_order_value_per_order:.0f}')

In [None]:
#Total pizzas sold
total_pizzas_sold = df['quantity'].sum()
print(f'Total Pizzas Sold: {total_pizzas_sold}')

In [None]:
#Total orders
total_orders = df['order_id'].nunique()
print(f'Total Orders: {total_orders}')

In [None]:
#Average pizzas per order
average_pizzas_per_order = df.groupby('order_id')['quantity'].sum().mean()
print(f'Average Pizzas per Order: {average_pizzas_per_order:.2f}')

In [None]:
# Per day total sales by weekday (Sunday to Saturday)
df['order_date'] = pd.to_datetime(df['order_date'])
df['weekday'] = df['order_date'].dt.day_name()
weekday_sales = df.groupby('weekday')['total_price'].sum().reindex(
    ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
).reset_index()
weekday_sales



In [None]:
# Per day total order sales by weekday (Sunday to Saturday)
df['order_date'] = pd.to_datetime(df['order_date'])
df['weekday'] = df['order_date'].dt.day_name()
weekday_order_sales = df.groupby('weekday')['order_id'].nunique().reindex(
    ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
).reset_index().rename(columns={'order_id': 'total_orders'})
weekday_order_sales

In [None]:
# Per day quqntity sales by weekday (Sunday to Saturday)
df['order_date'] = pd.to_datetime(df['order_date'])
df['weekday'] = df['order_date'].dt.day_name()
weekday_sales = df.groupby('weekday')['quantity'].sum().reindex(
    ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
).reset_index()
weekday_sales


In [None]:
# Per month total order sales by month name (January to December)
monthly_order_sales_with_month_name = monthly_order_sales.copy()
monthly_order_sales_with_month_name['month_name'] = monthly_order_sales_with_month_name['order_month'].dt.strftime('%B')
monthly_order_sales_with_month_name = monthly_order_sales_with_month_name.sort_values('order_month')
monthly_order_sales_with_month_name[['month_name', 'total_orders']]

In [None]:
# Per day total order sales by month
month_order_sales = df.groupby('order_month')['order_id'].nunique().reset_index().rename(columns={'order_id': 'total_orders'})
month_order_sales

In [None]:
#total sales by pizza size
pizza_size_sales = df.groupby('pizza_size')['total_price'].sum().reset_index()
pizza_size_sales = pizza_size_sales.sort_values(by='total_price', ascending=False)
pizza_size_sales


In [None]:

#plotting total sales by pizza size
plt.figure(figsize=(10, 6))
sns.barplot(x='total_price', y='pizza_size', data=pizza_size_sales, palette='viridis')
plt.title('Total Sales by Pizza Size')
plt.xlabel('Total Sales ($)')
plt.ylabel('Pizza Size')
plt.show()

In [None]:
#total quqntity sales by pizza category
pizza_category_sales=df.groupby('pizza_category')['quantity'].sum().reset_index()
pizza_category_sales = pizza_category_sales.sort_values(by='quantity', ascending=False)
pizza_category_sales

In [None]:
#Plot total quqntity sales by pizza category
plt.figure(figsize=( 8,4))
sns.barplot(x='quantity', y='pizza_category', data=pizza_category_sales, palette='viridis')
plt.title('Total quantity by Pizza Size')
plt.xlabel('Total quantity')
plt.ylabel('Pizza Size')
plt.show()

In [None]:
#Top 5 most revenue generating pizzas
most_revenue=df.groupby('pizza_name')['total_price'].sum().reset_index()
most_revenue=most_revenue.sort_values(by='total_price',ascending=False).head(5)
most_revenue


In [None]:
#Plot Top 5 most revenue generating pizzas
plt.figure(figsize=(10, 6))
sns.barplot(x='total_price', y='pizza_name', data=most_revenue, palette='viridis')
plt.title('Top 5 Most Revenue Generating Pizzas')
plt.xlabel('Total Revenue ($)')
plt.ylabel('Pizza Name')
plt.grid(True)
plt.show()


In [None]:
#Top 5 worst revenue generating pizzas
worst_revenue=df.groupby('pizza_name')['total_price'].sum().reset_index()
worst_revenue=worst_revenue.sort_values(by='total_price',ascending=True).head(5)
worst_revenue


In [None]:
# Plot Top 5 worst revenue generating pizzas
plt.figure(figsize=(10, 6))
most_revenue=most_revenue.sort_values(by='total_price',ascending=True).head(5)
sns.barplot(x='total_price', y='pizza_name', data=most_revenue, palette='viridis')
plt.title('Top 5 Worst Revenue Generating Pizzas')  
plt.xlabel('Total Revenue ($)')
plt.ylabel('Pizza Name')
plt.grid(True)
plt.show()

In [None]:
#top 5 most orders pizza sold
most_pizza_sold=df.groupby('pizza_name')['quantity'].sum().reset_index()
most_pizza_sold=most_pizza_sold.sort_values(by='quantity',ascending=False).head()
most_pizza_sold

In [None]:
#Plot top 5 most orders pizza sold
plt.figure(figsize=(10, 6))
most_pizza_sold=most_pizza_sold.sort_values(by='quantity',ascending=False).head(5)
sns.barplot(x='quantity', y='pizza_name', data=most_pizza_sold  , palette='viridis')
plt.title('Top 5 Most Orders Pizza Sold')
plt.xlabel('Total Orders')
plt.ylabel('Pizza Name')
plt.grid(True)
plt.show()

In [None]:
# top worst orders pizza sold
worst_pizza_sold=df.groupby('pizza_name')['quantity'].sum().reset_index().sort_values(by='quantity',ascending=True).head(5)
worst_pizza_sold


In [None]:
#plot top worst orders pizza sold
plt.figure(figsize=(10, 6))
worst_pizza_sold=worst_pizza_sold.sort_values(by='quantity',ascending=True).head(5)
sns.barplot(x='quantity', y='pizza_name', data=worst_pizza_sold, palette='viridis')
plt.title('Top 5 Worst Orders Pizza Sold')
plt.xlabel('Total Orders')
plt.ylabel('Pizza Name')
plt.grid(True)
plt.show()
