In [11]:
import pandas as pd

# Load datasets
product_data = pd.read_excel(r"C:\Users\Project Manager\Downloads\New folder (2)\Product .xlsx")
employee_data = pd.read_excel(r"C:\Users\Project Manager\Downloads\New folder (2)\employee .xlsx")
sales_data = pd.read_excel(r"C:\Users\Project Manager\Downloads\New folder (2)\sales .xlsx")
customer_data = pd.read_excel(r"C:\Users\Project Manager\Downloads\New folder (2)\Customer .xlsx")

# Step 1: Data Integration
# Merge sales with product data
sales_product = pd.merge(sales_data, product_data, on='product_id', how='inner')

# Merge with customer data
sales_customer_product = pd.merge(sales_product, customer_data, on='customer_id', how='inner')

# Step 2: Add Derived Fields
sales_customer_product['total_sale_amount'] = sales_customer_product['quantity'] * sales_customer_product['price']
sales_customer_product['profit_margin'] = (
    sales_customer_product['total_sale_amount'] - (sales_customer_product['quantity'] * sales_customer_product['cost_price'])
)

# Add date dimensions
sales_customer_product['year'] = sales_customer_product['date'].dt.year
sales_customer_product['month'] = sales_customer_product['date'].dt.month
sales_customer_product['quarter'] = sales_customer_product['date'].dt.quarter

# Step 3: Data Loading
# Save the comprehensive sales report
sales_customer_product.to_csv("comprehensive_sales_report.csv", index=False)

# Step 4: Aggregated Summary Report
# Total sales and profit by product category
category_summary = sales_customer_product.groupby('category').agg({
    'total_sale_amount': 'sum',
    'profit_margin': 'sum'
}).reset_index()

# Save the summary report
category_summary.to_csv("category_summary_report.csv", index=False)

# Analysis Examples
# Top-selling products
top_products = sales_customer_product.groupby('product_name')['total_sale_amount'].sum().sort_values(ascending=False).head(5)

# Key customers
top_customers = sales_customer_product.groupby('name')['total_sale_amount'].sum().sort_values(ascending=False).head(5)

# Sales trends over time
sales_trends = sales_customer_product.groupby(['year', 'month'])['total_sale_amount'].sum().reset_index()

# Display insights
print("Top 5 Products by Sales:\n", top_products)
print("\nTop 5 Customers by Purchases:\n", top_customers)
print("\nSales Trends Over Time:\n", sales_trends)

# Save additional reports if needed
sales_trends.to_csv("sales_trends.csv", index=False)



Top 5 Products by Sales:
 product_name
Bluetooth Speaker    9499.05
LED Desk Lamp        4649.07
Wireless Mouse       2729.09
Notebook Set         1439.10
Name: total_sale_amount, dtype: float64

Top 5 Customers by Purchases:
 name
Jackson Carter    999.90
William Young     799.92
Jacob Adams       599.94
Matthew Clark     599.94
Michael Brown     547.92
Name: total_sale_amount, dtype: float64

Sales Trends Over Time:
    year  month  total_sale_amount
0  2024      9            4215.14
1  2024     10            4677.06
2  2024     11            4541.10
3  2024     12            4293.09
4  2025      1             589.92
