In [None]:
""" SALES INSIGHTS PROJECT 
- Data loading & inspection

- Basic cleaning

- Simple analysis

- Visualizations

- Drawing insights
"""

In [2]:
import pandas as pd
import matplotlib.pyplot as plt

# Create sample data
data = {
    "OrderID": [101, 102, 103, 104, 105],
    "Date": ["2023-01-15", "2023-01-16", "2023-01-16", "2023-01-17", "2023-01-18"],
    "Product": ["Laptop", "Mouse", "Keyboard", "Laptop", "Monitor"],
    "Price": [1200, 25, 45, 1150, 300],
    "Quantity": [1, 3, 2, 1, 1],
    "Region": ["West", "East", "West", "North", "East"]
}

df = pd.DataFrame(data)
print(df)

   OrderID        Date   Product  Price  Quantity Region
0      101  2023-01-15    Laptop   1200         1   West
1      102  2023-01-16     Mouse     25         3   East
2      103  2023-01-16  Keyboard     45         2   West
3      104  2023-01-17    Laptop   1150         1  North
4      105  2023-01-18   Monitor    300         1   East


In [None]:
# DATA PREPARATION

# Calculate total sales per order
df["Total"] = df["Price"] * df["Quantity"]

# Extract month from date
df["Date"] = pd.to_datetime(df["Date"])
df["Month"] = df["Date"].dt.month_name()

   OrderID       Date   Product  Price  Quantity Region  Total    Month
0      101 2023-01-15    Laptop   1200         1   West   1200  January
1      102 2023-01-16     Mouse     25         3   East     75  January
2      103 2023-01-16  Keyboard     45         2   West     90  January
3      104 2023-01-17    Laptop   1150         1  North   1150  January
4      105 2023-01-18   Monitor    300         1   East    300  January


In [34]:
# HANDLE MISSING DATA (practice)

df.loc[2, "Price"] = None       # Add missing price
df.loc[4, "Region"] = None      # add mising region

# FIll Missing Values
df["Price"] = pd.to_numeric(df["Price"], errors="coerce").fillna(df["Price"].mean())
df["Region"] = df["Region"].fillna("Unknown").astype("category")

In [None]:
# BASIC ANALYSIS

# Q1: What's the total revenue?
total_revenue = df["Total"].sum()
print(f"Total Revenue: ${total_revenue:,.2f}")

# Q2: Which product sold most units?
df.groupby("Product")["Quantity"].sum().idxmax()
best_seller = df.groupby("Product")["Quantity"].sum().idxmax()
print(best_seller)

# Q3: What's the average order value?
avg_order = df["Total"].mean()
print(f"Average Order Value: ${avg_order:,.2f}")

# Q4 Simple Visualizations

# Sales by Product (Bar Chart)
df.groupby("Product")["Total"].sum().plot(
    kind="bar",
    title="Total Sales by Product",
    color="skyblue"
)
plt.ylabel("Sales ($)")
plt.show()

# Orders by Region (Pie Chart)
df["Region"].value_counts().plot(
    kind="pie",
    title="Orders by Region",
    autopct="%1.1f%%"
)
plt.show()

# Q5: Exporting Results

# Save cleaned data
df.to_csv("cleaned_sales_data.csv", index=False)

# Save summary stats
summary = df.groupby("Product").agg({
    "Quantity": "sum",
    "Total": "sum"
})
summary.to_excel("sales_summary.xlsx")