In [None]:
"""
NOTEBOOK 4 – VISUALISATIONS & ANALYSE DES KPI
Répond explicitement aux besoins métiers 1 → 10
"""

# ============================================================
# SECTION 1 : IMPORTS & CONFIGURATION
# ============================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

OUTPUT_DIR = Path("outputs/figures")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
DIR_DATA = Path(r'C:\Users\pc\Desktop\enset\python-project\data\Sales.xlsx')

print("✓ Configuration terminée")

# ============================================================
# SECTION 2 : CHARGEMENT DES DONNÉES
# ============================================================

df_sales = pd.read_csv("data/processed/sales_clean.csv")
df_sales['OrderDate'] = pd.to_datetime(df_sales['OrderDate'])

df_monthly = pd.read_csv("data/processed/measures_monthly_yoy.csv")
df_product = pd.read_csv("data/processed/measures_by_product.csv")

df_customers = pd.read_excel(DIR_DATA, sheet_name="Customers")
df_regions = pd.read_excel(DIR_DATA, sheet_name="Regions")
df_products = pd.read_excel(DIR_DATA, sheet_name="Products")

print("✓ Données chargées")

# ============================================================
# SECTION 3 : VARIABLES TEMPORELLES (CY / PY)
# ============================================================

CURRENT_YEAR = df_sales['Year'].max()
PREVIOUS_YEAR = CURRENT_YEAR - 1

df_cy = df_sales[df_sales['Year'] == CURRENT_YEAR]
df_py = df_sales[df_sales['Year'] == PREVIOUS_YEAR]

# ============================================================
# SECTION 4 : KPI CARDS (BESOINS 1 → 4)
# ============================================================

total_sales = df_cy['Sales'].sum()
total_profit = df_cy['Profit'].sum()
total_orders = df_cy['OrderNumber'].nunique()
profit_margin = (total_profit / total_sales) * 100 if total_sales else 0

print("\n=== KPI PRINCIPAUX ===")
print(f"Total Sales : {total_sales:,.2f}")
print(f"Total Profit : {total_profit:,.2f}")
print(f"Total Orders : {total_orders:,}")
print(f"Profit Margin % : {profit_margin:.2f}%")

# ============================================================
# SECTION 5 : VENTES PAR PRODUIT – CY vs PY (BESOIN 5)
# ============================================================

fig, ax = plt.subplots(figsize=(14,6))

sales_prod_cy = df_cy.groupby("Product Description Index")['Sales'].sum()
sales_prod_py = df_py.groupby("Product Description Index")['Sales'].sum()

products = sales_prod_cy.index
x = np.arange(len(products))
width = 0.35

ax.bar(x - width/2, sales_prod_cy, width, label="Sales CY")
ax.bar(x + width/2, sales_prod_py.reindex(products).fillna(0), width, label="Sales PY")

ax.set_title("Sales by Product – CY vs PY")
ax.set_xticks(x)
ax.set_xticklabels(products, rotation=45)
ax.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "sales_by_product_yoy.png", dpi=300)
plt.show()

# ============================================================
# SECTION 6 : VENTES PAR MOIS – CY vs PY (BESOIN 6)
# ============================================================

fig, ax = plt.subplots(figsize=(14,6))

monthly_cy = df_cy.groupby("Month")['Sales'].sum()
monthly_py = df_py.groupby("Month")['Sales'].sum()

months = range(1,13)
ax.plot(months, monthly_cy.reindex(months), marker='o', label="CY")
ax.plot(months, monthly_py.reindex(months), marker='o', linestyle='--', label="PY")

ax.set_title("Monthly Sales – CY vs PY")
ax.set_xlabel("Month")
ax.set_ylabel("Sales")
ax.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "sales_by_month_yoy.png", dpi=300)
plt.show()

# ============================================================
# SECTION 7 : TOP 5 VILLES (BESOIN 7) — FIXED
# ============================================================

# Merge Sales with Regions using REAL column names
df_sales_region = df_sales.merge(
    df_regions,
    left_on="Delivery Region Index",
    right_on="Index",
    how="left"
)

# Filter current year
df_sales_region_cy = df_sales_region[df_sales_region['Year'] == CURRENT_YEAR]

# Top 5 cities by Sales
top_cities = (
    df_sales_region_cy
    .groupby("City")['Sales']
    .sum()
    .nlargest(5)
)

# Plot
fig, ax = plt.subplots(figsize=(8,8))
ax.pie(
    top_cities.values,
    labels=top_cities.index,
    autopct='%1.1f%%',
    startangle=90
)

ax.set_title("Top 5 Cities by Sales")

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "top5_cities.png", dpi=300)
plt.show()

print("✓ Graphique Top 5 Cities généré")


# ============================================================
# SECTION 8 : PROFIT PAR CHANNEL – CY vs PY (BESOIN 8)
# ============================================================

profit_channel_cy = df_cy.groupby("Channel")['Profit'].sum()
profit_channel_py = df_py.groupby("Channel")['Profit'].sum()

fig, ax = plt.subplots(figsize=(12,6))
profit_channel_cy.plot(kind='bar', ax=ax, label="CY")
profit_channel_py.plot(kind='bar', ax=ax, linestyle='--', label="PY")

ax.set_title("Profit by Channel – CY vs PY")
ax.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "profit_by_channel_yoy.png", dpi=300)
plt.show()

# ============================================================
# SECTION 9 : TOP 5 CLIENTS – CY vs PY (BESOIN 9)
# ============================================================

top_clients = df_cy.groupby("Customer Name Index")['Sales'].sum().nlargest(5)
top_clients_py = df_py.groupby("Customer Name Index")['Sales'].sum()

fig, ax = plt.subplots(figsize=(12,6))
top_clients.plot(kind='barh', ax=ax, label="CY")
top_clients_py.reindex(top_clients.index).fillna(0).plot(
    kind='barh', ax=ax, linestyle='--', label="PY"
)

ax.set_title("Top 5 Customers – CY vs PY")
ax.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "top5_customers_yoy.png", dpi=300)
plt.show()

# ============================================================
# SECTION 10 : LAST 5 CLIENTS – CY vs PY (BESOIN 10)
# ============================================================

last_clients = df_cy.groupby("Customer Name Index")['Sales'].sum().nsmallest(5)
last_clients_py = df_py.groupby("Customer Name Index")['Sales'].sum()

fig, ax = plt.subplots(figsize=(12,6))
last_clients.plot(kind='barh', ax=ax, label="CY")
last_clients_py.reindex(last_clients.index).fillna(0).plot(
    kind='barh', ax=ax, linestyle='--', label="PY"
)

ax.set_title("Last 5 Customers – CY vs PY")
ax.legend()

plt.tight_layout()
plt.savefig(OUTPUT_DIR / "last5_customers_yoy.png", dpi=300)
plt.show()

print("\n✅ NOTEBOOK 4 – VISUALISATIONS TERMINÉ AVEC SUCCÈS")
