# 📊 Project #1: Simple Retail Sales Analysis

**Author:** CraftedCoder  
**Date:** October 20, 2025  
**Skill Level:** Beginner (Basic pandas only)  

## 🎯 What We'll Learn
- Load data from CSV files
- Clean messy data
- Calculate basic business metrics
- Create simple charts
- Generate insights from data

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

print("🎯 Project #1: Simple Retail Sales Analysis")
print("=" * 40)

## Step 1: Load the Data 📥

In [None]:
# Load the data from CSV file
print("📥 Loading data...")
df = pd.read_csv('sales_data.csv')
print("✅ Data loaded!")
print(f"Rows: {len(df)}")
print(f"Columns: {len(df.columns)}")

## Step 2: Look at the Data 👀

In [None]:
# Look at first 5 rows
print("👀 First 5 rows:")
print(df.head())

In [None]:
# Check column names
print("Column names:")
print(df.columns.tolist())

In [None]:
# Get basic information about the data
print("Data info:")
print(df.info())

## Step 3: Check for Problems 🔍

In [None]:
# Check for missing data
print("🔍 Checking for missing data...")
print("Missing values:")
print(df.isnull().sum())

In [None]:
# Check data types
print("Data types:")
print(df.dtypes)

## Step 4: Clean the Data 🧹

In [None]:
# Clean the data step by step
print("🧹 Cleaning data...")

# Remove rows with missing CustomerID
print("Before cleaning:", len(df))
df_clean = df.dropna(subset=['CustomerID'])
print("After removing missing CustomerID:", len(df_clean))

In [None]:
# Remove rows with zero price
df_clean = df_clean[df_clean['UnitPrice'] > 0]
print("After removing zero prices:", len(df_clean))

In [None]:
# Add total price column (simple math)
df_clean['TotalPrice'] = df_clean['Quantity'] * df_clean['UnitPrice']
print("✅ Added TotalPrice column")

## Step 5: Basic Analysis 📊

In [None]:
# Calculate basic business metrics
print("📊 Basic Analysis")

# Total sales
total_sales = df_clean['TotalPrice'].sum()
print(f"Total Sales: £{total_sales:.2f}")

# Number of orders
num_orders = len(df_clean)
print(f"Number of Orders: {num_orders}")

# Number of customers
num_customers = df_clean['CustomerID'].nunique()
print(f"Number of Customers: {num_customers}")

# Average order value
avg_order = df_clean['TotalPrice'].mean()
print(f"Average Order: £{avg_order:.2f}")

## Step 6: Simple Grouping 📈

In [None]:
# Group sales by country
print("📈 Sales by Country")
country_sales = df_clean.groupby('Country')['TotalPrice'].sum()
print("Top 5 Countries:")
print(country_sales.sort_values(ascending=False).head())

In [None]:
# Find top products
print("🛍️ Top Products")
product_sales = df_clean.groupby('Description')['Quantity'].sum()
print("Most Sold Products:")
print(product_sales.sort_values(ascending=False).head())

## Step 7: Create Simple Charts 📊

In [None]:
# Chart 1: Sales by top 5 countries
print("📊 Creating simple charts...")

plt.figure(figsize=(10, 6))
top_countries = country_sales.sort_values(ascending=False).head()
plt.bar(range(len(top_countries)), top_countries.values)
plt.title('Sales by Top 5 Countries')
plt.ylabel('Sales (£)')
plt.xticks(range(len(top_countries)), top_countries.index, rotation=45)
plt.show()

In [None]:
# Chart 2: Order value distribution
plt.figure(figsize=(8, 6))
plt.hist(df_clean['TotalPrice'], bins=30)
plt.title('Order Value Distribution')
plt.xlabel('Order Value (£)')
plt.ylabel('Number of Orders')
plt.xlim(0, 500)  # Show main part only
plt.show()

In [None]:
# Chart 3: Top 5 products
plt.figure(figsize=(10, 6))
top_products = product_sales.sort_values(ascending=False).head()
plt.barh(range(len(top_products)), top_products.values)
plt.title('Top 5 Products by Quantity Sold')
plt.xlabel('Quantity Sold')
plt.yticks(range(len(top_products)), top_products.index)
plt.show()

## Step 8: Save Results 💾

In [None]:
# Save the cleaned data
print("💾 Saving results...")
df_clean.to_csv('clean_sales_data.csv', index=False)
print("✅ Clean data saved!")

In [None]:
# Create and save summary
summary_data = {
    'Metric': ['Total Sales', 'Number of Orders', 'Number of Customers', 'Average Order'],
    'Value': [f'£{total_sales:.2f}', num_orders, num_customers, f'£{avg_order:.2f}']
}
summary_df = pd.DataFrame(summary_data)
summary_df.to_csv('summary.csv', index=False)
print("✅ Summary saved!")

## 🎉 Final Results

In [None]:
print("🎉 Analysis complete!")
print("Key findings:")
print(f"- Total sales: £{total_sales:.2f}")
print(f"- Top country: {top_countries.index[0]}")
print(f"- Average customer spends: £{total_sales/num_customers:.2f}")