In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
#Load Dataset

data = pd.read_csv("C:/Users/HP/Desktop/Vaibhav/retail_sales_dataset.csv")
print("---Data loaded successfully---")
print(data.head())
print("---Data Information---")
data.info()

In [None]:
#Basic Statistics

print("---Summary Statistics---")
data.describe()

In [None]:
#Handle Missing Values
print("---Check missing values---")
print(data.isnull().sum())
print("---Drop missing values (If any)---")
print(data.dropna())

In [None]:
#Convert Date column to datetime

data["Date"] = pd.to_datetime(data["Date"])

In [None]:
#Add Month & Year for analysis

data["Month"] = data["Date"].dt.to_period("M")

In [None]:
#Monthly Revenue

monthly_revenue = data.groupby("Month")["Total Amount"].sum()
plt.figure(figsize=(20, 10))
monthly_revenue.plot(kind="line", marker="o", color="blue")
plt.title("Monthly Revenue Trend")
plt.xlabel("Month")
plt.ylabel("Revenue")
plt.grid(True)
plt.show()

In [None]:
#Top Product Categories

category_sales = data.groupby("Product Category")["Total Amount"].sum().sort_values(ascending=False)
plt.figure(figsize=(7, 5))
category_sales.plot(kind="bar", color="green")
plt.title("Revenue by Product Category")
plt.xlabel("Product Category")
plt.ylabel("Revenue")
plt.show()

In [None]:
#Gender-wise Revenue

gender_sales = data.groupby("Gender")["Total Amount"].sum()
plt.figure(figsize=(5, 4))
gender_sales.plot(kind="bar", color=["black", "orange"])
plt.title("Revenue by Gender")
plt.xlabel("Gender")
plt.ylabel("Revenue")
plt.show()

In [None]:
#Age Group Analysis

bins = [0, 18, 30, 45, 60, 100]
labels = ["<18", "18-30", "31-45", "46-60", "60+"]
data["Age Group"] = pd.cut(data["Age"], bins=bins, labels=labels)
age_group_sales = data.groupby("Age Group",observed = False)["Total Amount"].sum()
plt.figure(figsize=(8, 5))
age_group_sales.plot(kind="bar", color="purple")
plt.title("Revenue by Age Group")
plt.xlabel("Age Group")
plt.ylabel("Revenue")
plt.show()

In [None]:
#Top 10 Customers by Revenue

top_customers = data.groupby("Customer ID")["Total Amount"].sum().sort_values(ascending=False).head(10)
print("\nTop 10 Customers by Revenue:\n", top_customers)

plt.figure(figsize=(10, 5))
top_customers.plot(kind="bar", color="orange")
plt.title("Top 10 Customers by Revenue")
plt.xlabel("Customer ID")
plt.ylabel("Revenue")
plt.show()

In [None]:
#Average Order Value (AOV)
aov = data["Total Amount"].mean()
print(f"\n💰 Average Order Value (AOV): {aov:.2f}")