In [1]:
import pandas as pd

# ==========================================
# DAY 1 & 2: Setup, Load & Explore
# ==========================================
# Load the dataset
df = pd.read_csv('sales_data.csv')

# Display basic information about the dataset
print("--- Dataset Overview ---")
print(f"Shape: {df.shape}")
print(df.info())

# ==========================================
# DAY 3: Data Cleaning
# ==========================================
# Check for missing values
missing_count = df.isnull().sum().sum()
if missing_count > 0:
    df = df.dropna()  # Remove rows with missing values
    print(f"\nRemoved {missing_count} rows with missing values.")

# Remove duplicate entries
df = df.drop_duplicates()

# ==========================================
# DAY 4: Analyze Sales (Calculations)
# ==========================================

# Metric 1: Total Revenue
total_revenue = df['Total_Sales'].sum()

# Metric 2: Best-Selling Product (by Quantity)
best_selling_product = df.groupby('Product')['Quantity'].sum().idxmax()
total_units_sold = df.groupby('Product')['Quantity'].sum().max()

# Metric 3: Average Order Value (AOV)
average_order_value = df['Total_Sales'].mean()

# Metric 4: Sales Performance by Region
regional_performance = df.groupby('Region')['Total_Sales'].sum().sort_values(ascending=False)

# ==========================================
# DAY 5: Create Report
# ==========================================
print("\n" + "="*30)
print("     SALES ANALYSIS REPORT     ")
print("="*30)

print(f"Total Revenue:         ${total_revenue:,.2f}")
print(f"Average Order Value:   ${average_order_value:,.2f}")
print(f"Best Selling Product:  {best_selling_product} ({total_units_sold} units)")

print("\n--- Revenue by Region ---")
print(regional_performance.to_string())

print("\n" + "="*30)
print("         END OF REPORT         ")

--- Dataset Overview ---
Shape: (100, 7)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Date         100 non-null    object
 1   Product      100 non-null    object
 2   Quantity     100 non-null    int64 
 3   Price        100 non-null    int64 
 4   Customer_ID  100 non-null    object
 5   Region       100 non-null    object
 6   Total_Sales  100 non-null    int64 
dtypes: int64(3), object(4)
memory usage: 5.6+ KB
None

     SALES ANALYSIS REPORT     
Total Revenue:         $12,365,048.00
Average Order Value:   $123,650.48
Best Selling Product:  Laptop (136 units)

--- Revenue by Region ---
Region
North    3983635
South    3737852
East     2519639
West     2123922

         END OF REPORT         


In [None]:
ðŸ“‹ Key Metrics Explained
Total Revenue: The sum of all values in the Total_Sales column. This represents the gross income generated.

Best-Selling Product: Calculated by grouping data by Product and summing the Quantity. It identifies which item has the highest market demand.

Average Order Value (AOV): Found by taking the mean of the Total_Sales. This helps understand how much the average customer spends per transaction.

Regional Performance: Shows which geographical area is driving the most revenue, which is critical for targeted marketing.