In [None]:
# ============================================================
# NOTEBOOK 3 : CALCUL DES MESURES & KPI
# Cours Python pour le Data Science – Application de synthèse
# ============================================================

# ------------------------------------------------------------
# SECTION 1 : IMPORTS ET CHARGEMENT DES DONNÉES
# ------------------------------------------------------------

import pandas as pd
import numpy as np

# Charger les données nettoyées (Notebook 2)
df_sales = pd.read_csv('data/processed/sales_clean.csv')

# Conversion explicite des dates
df_sales['OrderDate'] = pd.to_datetime(df_sales['OrderDate'])

print(f"✓ Données chargées : {df_sales.shape}")


# ------------------------------------------------------------
# SECTION 2 : CRÉATION DE LA TABLE DE DATES (DATE DIMENSION)
# ------------------------------------------------------------
# Exigence du cours :
# - Dates uniques
# - Pas de valeurs nulles
# - Couverture complète de la période
# - Colonnes temporelles complètes

date_range = pd.date_range(
    start=df_sales['OrderDate'].min(),
    end=df_sales['OrderDate'].max(),
    freq='D'
)

date_table = pd.DataFrame({'Date': date_range})

date_table['Year'] = date_table['Date'].dt.year
date_table['Quarter'] = date_table['Date'].dt.quarter
date_table['Month'] = date_table['Date'].dt.month
date_table['Month_Name'] = date_table['Date'].dt.strftime('%B')
date_table['Month_Short_Name'] = date_table['Date'].dt.strftime('%b')
date_table['Day'] = date_table['Date'].dt.day
date_table['Day_Name'] = date_table['Date'].dt.strftime('%A')
date_table['Day_of_Week'] = date_table['Date'].dt.dayofweek
date_table['Week_of_Year'] = date_table['Date'].dt.isocalendar().week
date_table['Day_of_Year'] = date_table['Date'].dt.dayofyear
date_table['Is_Weekend'] = date_table['Day_of_Week'].isin([5, 6])

date_table.to_csv('data/processed/date_table.csv', index=False)

print(f"✓ Table de dates créée : {date_table.shape}")


# ------------------------------------------------------------
# SECTION 3 : MESURES GLOBALES (ÉQUIVALENT DAX)
# ------------------------------------------------------------
# Total Sales = SUM(Sales)
# Total Profit = SUM(Profit)
# Total Order Quantity = SUM(Order Quantity)
# Profit Margin % = Total Profit / Total Sales

total_sales = df_sales['Sales'].sum()
total_profit = df_sales['Profit'].sum()
total_order_quantity = df_sales['Order Quantity'].sum()
total_cost = df_sales['Cost'].sum()

profit_margin_pct = (
    total_profit / total_sales * 100 if total_sales != 0 else 0
)

print("\n=== MESURES GLOBALES ===")
print(f"Total Sales : {total_sales:,.2f}")
print(f"Total Profit : {total_profit:,.2f}")
print(f"Total Order Quantity : {total_order_quantity:,.0f}")
print(f"Total Cost : {total_cost:,.2f}")
print(f"Profit Margin % : {profit_margin_pct:.2f}%")


# ------------------------------------------------------------
# SECTION 4 : MESURES PAR ANNÉE
# ------------------------------------------------------------

measures_by_year = df_sales.groupby('Year').agg(
    Total_Sales=('Sales', 'sum'),
    Total_Profit=('Profit', 'sum'),
    Total_Order_Quantity=('Order Quantity', 'sum'),
    Total_Cost=('Cost', 'sum')
).reset_index()

measures_by_year['Profit_Margin_%'] = (
    measures_by_year['Total_Profit'] /
    measures_by_year['Total_Sales'] * 100
)

measures_by_year.to_csv(
    'data/processed/measures_by_year.csv', index=False
)

print("\n=== MESURES PAR ANNÉE ===")
display(measures_by_year)


# ------------------------------------------------------------
# SECTION 5 : MESURES YEAR-OVER-YEAR (YOY)
# ------------------------------------------------------------
# Équivalent DAX :
# SAMEPERIODLASTYEAR(Date)

df_sales['Month'] = df_sales['OrderDate'].dt.month

monthly_measures = df_sales.groupby(['Year', 'Month']).agg(
    Sales=('Sales', 'sum'),
    Profit=('Profit', 'sum'),
    Order_Quantity=('Order Quantity', 'sum')
).reset_index()

monthly_measures['Sales_PY'] = (
    monthly_measures.groupby('Month')['Sales'].shift(1)
)
monthly_measures['Profit_PY'] = (
    monthly_measures.groupby('Month')['Profit'].shift(1)
)
monthly_measures['Order_Quantity_PY'] = (
    monthly_measures.groupby('Month')['Order_Quantity'].shift(1)
)

monthly_measures['Sales_PY_Var'] = (
    monthly_measures['Sales'] - monthly_measures['Sales_PY']
)
monthly_measures['Sales_PY_Var_%'] = (
    monthly_measures['Sales_PY_Var'] / monthly_measures['Sales'] * 100
).fillna(0)

monthly_measures['Profit_PY_Var'] = (
    monthly_measures['Profit'] - monthly_measures['Profit_PY']
)
monthly_measures['Profit_PY_Var_%'] = (
    monthly_measures['Profit_PY_Var'] / monthly_measures['Profit'] * 100
).fillna(0)

monthly_measures['Profit_Margin_%'] = (
    monthly_measures['Profit'] / monthly_measures['Sales'] * 100
).fillna(0)

monthly_measures.to_csv(
    'data/processed/measures_monthly_yoy.csv', index=False
)

print("\n=== MESURES YEAR-OVER-YEAR (YOY) ===")
display(monthly_measures.tail(12))


# ------------------------------------------------------------
# SECTION 6 : MESURES PAR PRODUIT (YOY)
# ------------------------------------------------------------

product_measures = df_sales.groupby(
    ['Product Description Index', 'Year']
).agg(
    Sales=('Sales', 'sum'),
    Profit=('Profit', 'sum'),
    Order_Quantity=('Order Quantity', 'sum')
).reset_index()

product_measures['Sales_PY'] = (
    product_measures.groupby('Product Description Index')['Sales']
    .shift(1)
)

product_measures['Sales_PY_Var'] = (
    product_measures['Sales'] - product_measures['Sales_PY']
)

product_measures['Sales_PY_Var_%'] = (
    product_measures['Sales_PY_Var'] /
    product_measures['Sales'] * 100
).fillna(0)

product_measures.to_csv(
    'data/processed/measures_by_product.csv', index=False
)

print("\n=== MESURES PAR PRODUIT ===")
display(product_measures.head())


# ------------------------------------------------------------
# SECTION 7 : TABLEAU RÉCAPITULATIF DES KPI
# ------------------------------------------------------------

measures_summary = pd.DataFrame({
    'Measure': [
        'Total Sales',
        'Total Profit',
        'Total Order Quantity',
        'Total Cost',
        'Profit Margin %'
    ],
    'Value': [
        f"{total_sales:,.2f}",
        f"{total_profit:,.2f}",
        f"{total_order_quantity:,.0f}",
        f"{total_cost:,.2f}",
        f"{profit_margin_pct:.2f}%"
    ]
})

measures_summary.to_csv(
    'data/processed/measures_summary.csv', index=False
)

print("\n=== RÉCAPITULATIF DES KPI ===")
display(measures_summary)

print("\n✅ NOTEBOOK 3 – CALCUL DES MESURES TERMINÉ AVEC SUCCÈS")
