In [37]:
# ============================================================================
# NOTEBOOK 3 : ANALYSE EXPLORATOIRE DES DONN√âES E-COMMERCE
# ============================================================================
# Objectif : D√©couvrir les patterns, tendances et insights cach√©s
# Focus : Visualisations actionables pour le business

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Configuration des graphiques
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
pd.set_option('display.float_format', '{:.2f}'.format)

print("üéØ NOTEBOOK 3 : ANALYSE EXPLORATOIRE")
print("=" * 40)
print("üìä D√©couverte des patterns business")
print("üîç Insights actionnables")
print("üìà Visualisations interactives")


üéØ NOTEBOOK 3 : ANALYSE EXPLORATOIRE
üìä D√©couverte des patterns business
üîç Insights actionnables
üìà Visualisations interactives


In [38]:
# ============================================================================
# üìä CHARGEMENT ET APER√áU DES DONN√âES NETTOY√âES
# ============================================================================

# Charger les donn√©es nettoy√©es
df = pd.read_pickle('C:/Users/Moi/E-commerce_Marketing_Analytics/data/processed/cleaned_ecommerce_data.pkl')

# Reconversion des types (n√©cessaire apr√®s CSV)
#df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
#df['Customer ID'] = df['Customer ID'].astype(str)

print("üìä APER√áU DES DONN√âES NETTOY√âES")
print("=" * 35)
print(f"üìè Dimensions: {df.shape}")
print(f"üìÖ P√©riode: {df['InvoiceDate'].min()} ‚Üí {df['InvoiceDate'].max()}")
print(f"üåç Pays: {df['Country'].nunique()}")
print(f"üë• Clients: {df['Customer ID'].nunique():,}")
print(f"üì¶ Produits: {df['StockCode'].nunique():,}")

# Aper√ßu des types de donn√©es
print("\nüîß TYPES DE DONN√âES :")
print(df.dtypes)
print("\nüîß TYPES DE DONN√âES FINAUX :")
for col in df.columns:
    print(f"  ‚Ä¢ {col}: {df[col].dtype}")


üìä APER√áU DES DONN√âES NETTOY√âES
üìè Dimensions: (1040892, 13)
üìÖ P√©riode: 2009-12-01 07:45:00 ‚Üí 2011-12-09 12:50:00
üåç Pays: 43
üë• Clients: 248,949
üì¶ Produits: 5,305

üîß TYPES DE DONN√âES :
Invoice                     object
StockCode                   object
Description                 object
Quantity                     int64
InvoiceDate         datetime64[ns]
Price                      float64
Customer ID                 object
Country                     object
Transaction_Type            object
Total_Amount               float64
Suspicious                    bool
Price_Outlier                 bool
Quantity_Outlier              bool
dtype: object

üîß TYPES DE DONN√âES FINAUX :
  ‚Ä¢ Invoice: object
  ‚Ä¢ StockCode: object
  ‚Ä¢ Description: object
  ‚Ä¢ Quantity: int64
  ‚Ä¢ InvoiceDate: datetime64[ns]
  ‚Ä¢ Price: float64
  ‚Ä¢ Customer ID: object
  ‚Ä¢ Country: object
  ‚Ä¢ Transaction_Type: object
  ‚Ä¢ Total_Amount: float64
  ‚Ä¢ Suspicious: bool
  ‚Ä¢ Pri

In [39]:
# ============================================================================
# üí∞ ANALYSE FINANCI√àRE GLOBALE
# ============================================================================

def analyze_financial_performance(df):
    """
    Analyse compl√®te des performances financi√®res
    """
    print("üí∞ ANALYSE FINANCI√àRE GLOBALE")
    print("=" * 30)
    
    # M√©triques globales
    sales_data = df[df['Transaction_Type'] == 'SALE']
    returns_data = df[df['Transaction_Type'] == 'RETURN']
    
    total_revenue = sales_data['Total_Amount'].sum()
    total_returns = abs(returns_data['Total_Amount'].sum())
    net_revenue = total_revenue - total_returns
    
    print(f"üíµ Chiffre d'affaires brut: ${total_revenue:,.2f}")
    print(f"‚Ü©Ô∏è  Montant des retours: ${total_returns:,.2f}")
    print(f"üíé Chiffre d'affaires net: ${net_revenue:,.2f}")
    print(f"üìä Taux de retour: {(total_returns/total_revenue)*100:.1f}%")
    
    # M√©triques par transaction
    avg_order_value = sales_data['Total_Amount'].mean()
    median_order_value = sales_data['Total_Amount'].median()
    
    print(f"\nüìà M√âTRIQUES PAR COMMANDE :")
    print(f"üí∞ Panier moyen: ${avg_order_value:.2f}")
    print(f"üìä Panier m√©dian: ${median_order_value:.2f}")
    print(f"üõí Nombre total de commandes: {len(sales_data):,}")
    
    # Analyse des articles
    total_items_sold = sales_data['Quantity'].sum()
    avg_items_per_order = sales_data['Quantity'].mean()
    
    print(f"\nüì¶ ANALYSE DES ARTICLES :")
    print(f"üìä Articles vendus: {total_items_sold:,}")
    print(f"üõí Articles par commande: {avg_items_per_order:.1f}")
    
    return {
        'total_revenue': total_revenue,
        'net_revenue': net_revenue,
        'return_rate': (total_returns/total_revenue)*100,
        'avg_order_value': avg_order_value,
        'total_orders': len(sales_data)
    }

# Ex√©cution de l'analyse financi√®re
financial_metrics = analyze_financial_performance(df)


üí∞ ANALYSE FINANCI√àRE GLOBALE
üíµ Chiffre d'affaires brut: $20,445,293.52
‚Ü©Ô∏è  Montant des retours: $1,516,344.05
üíé Chiffre d'affaires net: $18,928,949.47
üìä Taux de retour: 7.4%

üìà M√âTRIQUES PAR COMMANDE :
üí∞ Panier moyen: $20.01
üìä Panier m√©dian: $10.00
üõí Nombre total de commandes: 1,021,752

üì¶ ANALYSE DES ARTICLES :
üìä Articles vendus: 10,907,268
üõí Articles par commande: 10.7


In [40]:
# ============================================================================
# üìÖ ANALYSE TEMPORELLE DES VENTES
# ============================================================================

def analyze_temporal_patterns(df):
    """
    Analyse des patterns temporels des ventes
    """
    print("\nüìÖ ANALYSE TEMPORELLE DES VENTES")
    print("=" * 30)
    
    sales_data = df[df['Transaction_Type'] == 'SALE'].copy()
    
    # Extraire les composantes temporelles
    sales_data['Date'] = sales_data['InvoiceDate'].dt.date
    sales_data['Hour'] = sales_data['InvoiceDate'].dt.hour
    sales_data['DayOfWeek'] = sales_data['InvoiceDate'].dt.day_name()
    sales_data['Month'] = sales_data['InvoiceDate'].dt.month
    sales_data['MonthName'] = sales_data['InvoiceDate'].dt.month_name()
    
    # Agr√©gations
    daily_sales = sales_data.groupby('Date').agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    hourly_sales = sales_data.groupby('Hour')['Total_Amount'].sum().reset_index()
    
    monthly_sales = sales_data.groupby('MonthName').agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique'
    }).reset_index()
    
    dow_sales = sales_data.groupby('DayOfWeek')['Total_Amount'].sum().reset_index()
    
    print(f"üìä P√©riode d'analyse: {sales_data['Date'].min()} ‚Üí {sales_data['Date'].max()}")
    print(f"üìà Ventes quotidiennes moyennes: ${daily_sales['Total_Amount'].mean():,.2f}")
    
    # üîß LIGNE CORRIG√âE - Remplacez cette ligne :
    # print(f"üïê Heure de pointe: {hourly_sales.loc[hourly_sales['Total_Amount'].idxmax(), 'Hour']}h")
    
    # üîß PAR CETTE LIGNE CORRIG√âE :
    if not hourly_sales.empty and hourly_sales['Total_Amount'].sum() > 0:
        print(f"üïê Heure de pointe: {hourly_sales.loc[hourly_sales['Total_Amount'].idxmax(), 'Hour']}h")
    else:
        print("üïê Heure de pointe: Aucune donn√©e horaire disponible")
    
    return {
        'daily_sales': daily_sales,
        'hourly_sales': hourly_sales,
        'monthly_sales': monthly_sales,
        'dow_sales': dow_sales
    }

# Ex√©cution de l'analyse temporelle
temporal_data = analyze_temporal_patterns(df)



üìÖ ANALYSE TEMPORELLE DES VENTES
üìä P√©riode d'analyse: 2009-12-01 ‚Üí 2011-12-09
üìà Ventes quotidiennes moyennes: $33,849.82
üïê Heure de pointe: 12h


In [41]:
# ============================================================================
# üîç DIAGNOSTIC DES DONN√âES
# ============================================================================

def diagnose_data_issues(df):
    """
    Diagnostic pour identifier les probl√®mes dans les donn√©es
    """
    print("\nüîç DIAGNOSTIC DES DONN√âES")
    print("=" * 40)
    
    # V√©rifications de base
    print(f"üìä Forme du DataFrame: {df.shape}")
    print(f"üìã Colonnes: {list(df.columns)}")
    print(f"üî¢ Lignes totales: {len(df):,}")
    
    # V√©rification Transaction_Type
    if 'Transaction_Type' in df.columns:
        print(f"\nüìà TRANSACTION_TYPE:")
        print(df['Transaction_Type'].value_counts())
        sales_count = (df['Transaction_Type'] == 'SALE').sum()
        print(f"   ‚Üí Nombre de SALES: {sales_count}")
    else:
        print("‚ùå Colonne 'Transaction_Type' manquante!")
    
    # V√©rification InvoiceDate
    if 'InvoiceDate' in df.columns:
        print(f"\nüìÖ INVOICEDATE:")
        print(f"   ‚Üí Type: {df['InvoiceDate'].dtype}")
        print(f"   ‚Üí Valeurs nulles: {df['InvoiceDate'].isnull().sum()}")
        print(f"   ‚Üí Premi√®res valeurs: {df['InvoiceDate'].head().tolist()}")
    else:
        print("‚ùå Colonne 'InvoiceDate' manquante!")
    
    # V√©rification Total_Amount
    if 'Total_Amount' in df.columns:
        print(f"\nüí∞ TOTAL_AMOUNT:")
        print(f"   ‚Üí Type: {df['Total_Amount'].dtype}")
        print(f"   ‚Üí Valeurs nulles: {df['Total_Amount'].isnull().sum()}")
        print(f"   ‚Üí Min: {df['Total_Amount'].min()}")
        print(f"   ‚Üí Max: {df['Total_Amount'].max()}")
        print(f"   ‚Üí Moyenne: {df['Total_Amount'].mean()}")
    else:
        print("‚ùå Colonne 'Total_Amount' manquante!")

# Ex√©cutez ce diagnostic
diagnose_data_issues(df)



üîç DIAGNOSTIC DES DONN√âES
üìä Forme du DataFrame: (1040892, 13)
üìã Colonnes: ['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'Price', 'Customer ID', 'Country', 'Transaction_Type', 'Total_Amount', 'Suspicious', 'Price_Outlier', 'Quantity_Outlier']
üî¢ Lignes totales: 1,040,892

üìà TRANSACTION_TYPE:
Transaction_Type
SALE      1021752
RETURN      19140
Name: count, dtype: int64
   ‚Üí Nombre de SALES: 1021752

üìÖ INVOICEDATE:
   ‚Üí Type: datetime64[ns]
   ‚Üí Valeurs nulles: 0
   ‚Üí Premi√®res valeurs: [Timestamp('2009-12-01 07:45:00'), Timestamp('2009-12-01 07:45:00'), Timestamp('2009-12-01 07:45:00'), Timestamp('2009-12-01 07:45:00'), Timestamp('2009-12-01 07:45:00')]

üí∞ TOTAL_AMOUNT:
   ‚Üí Type: float64
   ‚Üí Valeurs nulles: 0
   ‚Üí Min: -168469.6
   ‚Üí Max: 168469.6
   ‚Üí Moyenne: 18.185315544744313


In [42]:
# ============================================================================
# üé® VISUALISATIONS FINANCI√àRES
# ============================================================================

def create_financial_dashboard(df, temporal_data):
    """
    Cr√©e un dashboard financier interactif
    """
    print("\nüé® CR√âATION DU DASHBOARD FINANCIER")
    print("=" * 35)
    
    # Configuration des subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=('√âvolution des Ventes Quotidiennes', 'Ventes par Heure',
                       'Ventes par Jour de la Semaine', 'Ventes par Mois'),
        specs=[[{'secondary_y': True}, {'type': 'bar'}],
               [{'type': 'bar'}, {'type': 'bar'}]]
    )
    
    # 1. √âvolution quotidienne
    fig.add_trace(
        go.Scatter(x=temporal_data['daily_sales']['Date'], 
                  y=temporal_data['daily_sales']['Total_Amount'],
                  name='Ventes Quotidiennes',
                  line=dict(color='blue', width=2)),
        row=1, col=1
    )
    
    # 2. Ventes par heure
    fig.add_trace(
        go.Bar(x=temporal_data['hourly_sales']['Hour'],
               y=temporal_data['hourly_sales']['Total_Amount'],
               name='Ventes par Heure',
               marker_color='lightblue'),
        row=1, col=2
    )
    
    # 3. Ventes par jour de la semaine
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    dow_ordered = temporal_data['dow_sales'].set_index('DayOfWeek').reindex(day_order).reset_index()
    
    fig.add_trace(
        go.Bar(x=dow_ordered['DayOfWeek'],
               y=dow_ordered['Total_Amount'],
               name='Ventes par Jour',
               marker_color='lightgreen'),
        row=2, col=1
    )
    
    # 4. Ventes par mois
    fig.add_trace(
        go.Bar(x=temporal_data['monthly_sales']['MonthName'],
               y=temporal_data['monthly_sales']['Total_Amount'],
               name='Ventes par Mois',
               marker_color='lightcoral'),
        row=2, col=2
    )
    
    # Mise √† jour du layout
    fig.update_layout(
        title_text="üìä DASHBOARD FINANCIER E-COMMERCE",
        title_x=0.5,
        height=800,
        showlegend=False
    )
    
    pio.renderers.default='browser'
    fig.show()
    
    print("‚úÖ Dashboard financier cr√©√©")

# Cr√©ation du dashboard
create_financial_dashboard(df, temporal_data)



üé® CR√âATION DU DASHBOARD FINANCIER
‚úÖ Dashboard financier cr√©√©


In [44]:
# ============================================================================
# üë• ANALYSE DES CLIENTS
# ============================================================================

def analyze_customer_behavior(df):
    """
    Analyse approfondie du comportement client
    """
    print("\nüë• ANALYSE DU COMPORTEMENT CLIENT")
    print("=" * 35)
    
    sales_data = df[df['Transaction_Type'] == 'SALE'].copy()
    
    # M√©triques par client
    customer_metrics = sales_data.groupby('Customer ID').agg({
        'Total_Amount': ['sum', 'count', 'mean'],
        'Invoice': 'nunique',
        'InvoiceDate': ['min', 'max'],
        'Quantity': 'sum'
    }).round(2)
    
    customer_metrics.columns = ['Total_Spent', 'Total_Orders', 'Avg_Order_Value', 
                               'Unique_Invoices', 'First_Purchase', 'Last_Purchase', 'Items_Bought']
    
    # P√©riode d'activit√©
    customer_metrics['Days_Active'] = (customer_metrics['Last_Purchase'] - customer_metrics['First_Purchase']).dt.days
    
    # Segmentation simple
    customer_metrics['Segment'] = pd.cut(customer_metrics['Total_Spent'], 
                                       bins=[0, 100, 500, 1000, float('inf')],
                                       labels=['Bronze', 'Silver', 'Gold', 'Platinum'])
    
    # Distinguer les clients invit√©s
    customer_metrics['Customer_Type'] = customer_metrics.index.to_series().apply(
        lambda x: 'Guest' if x.startswith('GUEST_') else 'Registered'
    )
    
    print(f"üìä Total clients: {len(customer_metrics):,}")
    print(f"üë§ Clients enregistr√©s: {(customer_metrics['Customer_Type'] == 'Registered').sum():,}")
    print(f"üé≠ Clients invit√©s: {(customer_metrics['Customer_Type'] == 'Guest').sum():,}")
    
    print(f"\nüí∞ R√âPARTITION PAR SEGMENT :")
    segment_dist = customer_metrics['Segment'].value_counts()
    for segment, count in segment_dist.items():
        pct = (count / len(customer_metrics)) * 100
        print(f"  {segment}: {count:,} clients ({pct:.1f}%)")
    
    print(f"\nüèÜ TOP CLIENTS :")
    top_customers = customer_metrics.nlargest(5, 'Total_Spent')
    for idx, (customer_id, data) in enumerate(top_customers.iterrows(), 1):
        print(f"  {idx}. Client {customer_id}: ${data['Total_Spent']:,.2f} ({data['Total_Orders']} commandes)")
    
    return customer_metrics

# Ex√©cution de l'analyse client
customer_data = analyze_customer_behavior(df)



üë• ANALYSE DU COMPORTEMENT CLIENT
üìä Total clients: 248,138
üë§ Clients enregistr√©s: 5,881
üé≠ Clients invit√©s: 242,257

üí∞ R√âPARTITION PAR SEGMENT :
  Bronze: 233,014 clients (93.9%)
  Silver: 4,705 clients (1.9%)
  Platinum: 2,845 clients (1.1%)
  Gold: 1,435 clients (0.6%)

üèÜ TOP CLIENTS :
  1. Client 18102: $580,987.04 (1040 commandes)
  2. Client 14646: $528,602.52 (3854 commandes)
  3. Client 14156: $313,437.62 (4038 commandes)
  4. Client 14911: $291,420.81 (11079 commandes)
  5. Client 17450: $244,784.25 (421 commandes)


In [46]:
# ============================================================================
# üì¶ ANALYSE DES PRODUITS
# ============================================================================

def analyze_product_performance(df):
    """
    Analyse des performances produits
    """
    print("\nüì¶ ANALYSE DES PERFORMANCES PRODUITS")
    print("=" * 40)
    
    sales_data = df[df['Transaction_Type'] == 'SALE'].copy()
    
    # M√©triques par produit
    product_metrics = sales_data.groupby(['StockCode', 'Description']).agg({
        'Quantity': 'sum',
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    product_metrics.columns = ['StockCode', 'Description', 'Total_Quantity', 
                              'Total_Revenue', 'Unique_Orders', 'Unique_Customers']
    
    # Calcul du prix moyen
    product_metrics['Avg_Price'] = product_metrics['Total_Revenue'] / product_metrics['Total_Quantity']
    
    # Tri par revenus
    product_metrics = product_metrics.sort_values('Total_Revenue', ascending=False)
    
    print(f"üìä Produits uniques: {len(product_metrics):,}")
    print(f"üí∞ Revenus totaux: ${product_metrics['Total_Revenue'].sum():,.2f}")
    
    print(f"\nüèÜ TOP 10 PRODUITS (Revenus) :")
    top_products = product_metrics.head(10)
    for idx, row in top_products.iterrows():
        desc = row['Description'][:50] + '...' if pd.notna(row['Description']) and len(str(row['Description'])) > 50 else row['Description']
        print(f"  {row['StockCode']}: ${row['Total_Revenue']:,.2f} | {desc}")
    
    print(f"\nüî• TOP 10 PRODUITS (Quantit√©) :")
    top_qty = product_metrics.nlargest(10, 'Total_Quantity')
    for idx, row in top_qty.iterrows():
        desc = row['Description'][:50] + '...' if pd.notna(row['Description']) and len(str(row['Description'])) > 50 else row['Description']
        print(f"  {row['StockCode']}: {row['Total_Quantity']:,} unit√©s | {desc}")
    
    return product_metrics

# Ex√©cution de l'analyse produit
product_data = analyze_product_performance(df)



üì¶ ANALYSE DES PERFORMANCES PRODUITS
üìä Produits uniques: 6,554
üí∞ Revenus totaux: $20,445,293.52

üèÜ TOP 10 PRODUITS (Revenus) :
  M: $340,153.38 | Manual
  22423: $335,733.20 | REGENCY CAKESTAND 3 TIER
  DOT: $322,657.48 | DOTCOM POSTAGE
  85123A: $257,906.71 | WHITE HANGING HEART T-LIGHT HOLDER
  23843: $168,469.60 | PAPER CRAFT , LITTLE BIRDIE
  47566: $148,590.20 | PARTY BUNTING
  85099B: $146,151.28 | JUMBO BAG RED RETROSPOT
  84879: $129,465.61 | ASSORTED COLOUR BIRD ORNAMENT
  POST: $125,682.42 | POSTAGE
  22086: $120,145.39 | PAPER CHAIN KIT 50'S CHRISTMAS 

üî• TOP 10 PRODUITS (Quantit√©) :
  84077: 106,265 unit√©s | WORLD WAR 2 GLIDERS ASSTD DESIGNS
  85123A: 94,208 unit√©s | WHITE HANGING HEART T-LIGHT HOLDER
  23843: 80,995 unit√©s | PAPER CRAFT , LITTLE BIRDIE
  84879: 80,138 unit√©s | ASSORTED COLOUR BIRD ORNAMENT
  23166: 78,033 unit√©s | MEDIUM CERAMIC TOP STORAGE JAR
  85099B: 77,331 unit√©s | JUMBO BAG RED RETROSPOT
  17003: 70,393 unit√©s | BROCADE RING PU

In [None]:
# ============================================================================
# üåç ANALYSE G√âOGRAPHIQUE
# ============================================================================

def analyze_geographic_performance(df):
    """
    Analyse des performances par pays
    """
    print("\nüåç ANALYSE G√âOGRAPHIQUE")
    
    print("=" * 25)
    
    sales_data = df[df['Transaction_Type'] == 'SALE'].copy()
    
    # M√©triques par pays
    country_metrics = sales_data.groupby('Country').agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique',
        'Quantity': 'sum'
    }).reset_index()
    
    country_metrics.columns = ['Country', 'Total_Revenue', 'Total_Orders', 'Unique_Customers', 'Items_Sold']
    
    # Calculs additionnels
    country_metrics['Avg_Order_Value'] = country_metrics['Total_Revenue'] / country_metrics['Total_Orders']
    country_metrics['Revenue_per_Customer'] = country_metrics['Total_Revenue'] / country_metrics['Unique_Customers']
    
    # Tri par revenus
    country_metrics = country_metrics.sort_values('Total_Revenue', ascending=False)
    
    print(f"üåé Pays actifs: {len(country_metrics)}")
    print(f"üèÜ Top pays: {country_metrics.iloc[0]['Country']}")
    
    print(f"\nüí∞ TOP 10 PAYS (Revenus) :")
    top_countries = country_metrics.head(10)
    for idx, row in top_countries.iterrows():
        pct = (row['Total_Revenue'] / country_metrics['Total_Revenue'].sum()) * 100
        print(f"  {row['Country']}: ${row['Total_Revenue']:,.2f} ({pct:.1f}%)")
    
    print(f"\nüë• TOP 10 PAYS (Clients) :")
    top_customers_countries = country_metrics.nlargest(10, 'Unique_Customers')
    for idx, row in top_customers_countries.iterrows():
        print(f"  {row['Country']}: {row['Unique_Customers']:,} clients")
    
    return country_metrics

# Ex√©cution de l'analyse g√©ographique
geographic_data = analyze_geographic_performance(df)



üåç ANALYSE G√âOGRAPHIQUE
üåé Pays actifs: 43
üèÜ Top pays: United Kingdom

üí∞ TOP 10 PAYS (Revenus) :
  United Kingdom: $17,378,389.04 (85.0%)
  EIRE: $659,371.21 (3.2%)
  Netherlands: $554,038.09 (2.7%)
  Germany: $425,019.71 (2.1%)
  France: $350,456.09 (1.7%)
  Australia: $169,283.46 (0.8%)
  Spain: $108,332.49 (0.5%)
  Switzerland: $100,707.89 (0.5%)
  Sweden: $91,869.82 (0.4%)
  Denmark: $68,580.69 (0.3%)

üë• TOP 10 PAYS (Clients) :
  United Kingdom: 244,737 clients
  EIRE: 1,614 clients
  Hong Kong: 358 clients
  Unspecified: 237 clients
  France: 223 clients
  Switzerland: 147 clients
  Portugal: 140 clients
  Germany: 107 clients
  United Arab Emirates: 88 clients
  Bahrain: 67 clients


In [47]:
# ============================================================================
# üìà VISUALISATIONS INTERACTIVES - PERFORMANCES BUSINESS
# ============================================================================

def create_business_dashboard(df):
    """
    Dashboard interactif des performances business
    """
    print("üìä CR√âATION DU DASHBOARD BUSINESS")
    print("=" * 35)
    
    # Pr√©paration des donn√©es pour les graphiques
    monthly_sales = df.groupby(df['InvoiceDate'].dt.to_period('M')).agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    monthly_sales['InvoiceDate'] = monthly_sales['InvoiceDate'].dt.to_timestamp()
    
    # 1. GRAPHIQUE 1 : √âvolution mensuelle des ventes
    fig1 = go.Figure()
    
    fig1.add_trace(go.Scatter(
        x=monthly_sales['InvoiceDate'],
        y=monthly_sales['Total_Amount'],
        mode='lines+markers',
        name='Revenus Mensuels',
        line=dict(color='#1f77b4', width=3),
        marker=dict(size=8)
    ))
    
    fig1.update_layout(
        title='üìà √âVOLUTION DES REVENUS MENSUELS',
        xaxis_title='Mois',
        yaxis_title='Revenus ( $ )',
        template='plotly_white',
        height=500
    )
    
    fig1.show()
    
    # 2. GRAPHIQUE 2 : Top 10 pays par revenus
    top_countries = df.groupby('Country')['Total_Amount'].sum().nlargest(10).reset_index()
    
    fig2 = px.bar(
        top_countries,
        x='Total_Amount',
        y='Country',
        orientation='h',
        title='üåç TOP 10 PAYS PAR REVENUS',
        labels={'Total_Amount': 'Revenus ( $ )', 'Country': 'Pays'}
    )
    
    fig2.update_layout(
        template='plotly_white',
        height=500,
        yaxis={'categoryorder': 'total ascending'}
    )
    
    fig2.show()
    
    # 3. GRAPHIQUE 3 : Heatmap des ventes par jour/heure
    sales_heatmap = df.pivot_table(
        values='Total_Amount',
        index=df['InvoiceDate'].dt.day_name(),
        columns=df['InvoiceDate'].dt.hour,
        aggfunc='sum'
    )
    
    # R√©organiser les jours de la semaine
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    sales_heatmap = sales_heatmap.reindex(day_order)
    
    fig3 = px.imshow(
        sales_heatmap,
        title='üïê HEATMAP VENTES PAR JOUR/HEURE',
        labels=dict(x="Heure", y="Jour", color="Revenus ($)"),
        aspect="auto"
    )
    
    fig3.update_layout(
        template='plotly_white',
        height=400
    )
    
    fig3.show()
    
    return fig1, fig2, fig3

# Cr√©ation du dashboard
dashboard_figs = create_business_dashboard(df)


üìä CR√âATION DU DASHBOARD BUSINESS


In [28]:
# ============================================================================
# üéØ ANALYSE DES PATTERNS DE COMPORTEMENT CLIENT
# ============================================================================

def analyze_customer_behavior(df):
    """
    Analyse approfondie du comportement client
    """
    print("üë• ANALYSE COMPORTEMENT CLIENT")
    print("=" * 35)
    
    # M√©triques par client
    customer_metrics = df.groupby('Customer ID').agg({
        'Total_Amount': ['sum', 'mean', 'count'],
        'Quantity': 'sum',
        'Invoice': 'nunique',
        'InvoiceDate': ['min', 'max']
    }).reset_index()
    
    # Aplatir les colonnes
    customer_metrics.columns = ['Customer_ID', 'Total_Revenue', 'Avg_Order_Value', 
                               'Total_Orders', 'Total_Items', 'Unique_Invoices', 
                               'First_Purchase', 'Last_Purchase']
    
    # Calculs additionnels
    customer_metrics['Customer_Lifetime_Days'] = (
        customer_metrics['Last_Purchase'] - customer_metrics['First_Purchase']
    ).dt.days
    
    customer_metrics['Recency_Days'] = (
        df['InvoiceDate'].max() - customer_metrics['Last_Purchase']
    ).dt.days
    
    # Segmentation simple
    customer_metrics['Revenue_Segment'] = pd.cut(
        customer_metrics['Total_Revenue'],
        bins=[0, 100, 500, 1000, float('inf')],
        labels=['Bronze', 'Silver', 'Gold', 'Platinum']
    )
    
    # Statistiques par segment
    print("üí∞ SEGMENTATION PAR REVENUS :")
    segment_stats = customer_metrics.groupby('Revenue_Segment').agg({
        'Total_Revenue': ['count', 'sum', 'mean'],
        'Avg_Order_Value': 'mean',
        'Total_Orders': 'mean'
    }).round(2)
    
    for segment in ['Bronze', 'Silver', 'Gold', 'Platinum']:
        if segment in segment_stats.index:
            count = segment_stats.loc[segment, ('Total_Revenue', 'count')]
            revenue = segment_stats.loc[segment, ('Total_Revenue', 'sum')]
            avg_revenue = segment_stats.loc[segment, ('Total_Revenue', 'mean')]
            avg_order = segment_stats.loc[segment, ('Avg_Order_Value', 'mean')]
            
            print(f"  üèÜ {segment}: {count:,} clients | "
                  f"${revenue:,.2f} total | "
                  f"${avg_revenue:.2f} moy/client | "
                  f"${avg_order:.2f} moy/commande")
    
    # Graphique de distribution
    fig4 = px.histogram(
        customer_metrics,
        x='Total_Revenue',
        nbins=50,
        title='üìä DISTRIBUTION DES REVENUS PAR CLIENT',
        labels={'Total_Revenue': 'Revenus par Client ($)', 'count': 'Nombre de Clients'}
    )
    
    fig4.update_layout(
        template='plotly_white',
        height=400
    )
    
    fig4.show()
    
    return customer_metrics, fig4

# Analyse comportement client
customer_data, customer_fig = analyze_customer_behavior(df)


üë• ANALYSE COMPORTEMENT CLIENT
üí∞ SEGMENTATION PAR REVENUS :
  üèÜ Bronze: 233,022 clients | $2,084,122.37 total | $8.94 moy/client | $8.91 moy/commande
  üèÜ Silver: 4,710 clients | $1,078,279.34 total | $228.93 moy/client | $128.32 moy/commande
  üèÜ Gold: 1,437 clients | $1,029,402.00 total | $716.35 moy/client | $176.66 moy/commande
  üèÜ Platinum: 2,796 clients | $15,420,363.29 total | $5515.15 moy/client | $166.87 moy/commande


In [29]:
# ============================================================================
# üèÜ ANALYSE DES PRODUITS PERFORMANTS
# ============================================================================

def analyze_product_performance(df):
    """
    Analyse d√©taill√©e des performances produits
    """
    print("üì¶ ANALYSE PERFORMANCE PRODUITS")
    print("=" * 35)
    
    # M√©triques par produit
    product_metrics = df.groupby(['StockCode', 'Description']).agg({
        'Total_Amount': ['sum', 'mean'],
        'Quantity': ['sum', 'mean'],
        'Customer ID': 'nunique',
        'Invoice': 'nunique'
    }).reset_index()
    
    # Aplatir les colonnes
    product_metrics.columns = ['StockCode', 'Description', 'Total_Revenue', 
                              'Avg_Revenue_per_Sale', 'Total_Quantity', 
                              'Avg_Quantity_per_Sale', 'Unique_Customers', 
                              'Unique_Orders']
    
    # Calculs additionnels
    product_metrics['Revenue_per_Customer'] = (
        product_metrics['Total_Revenue'] / product_metrics['Unique_Customers']
    )
    
    product_metrics['Repeat_Purchase_Rate'] = (
        product_metrics['Unique_Orders'] / product_metrics['Unique_Customers']
    )
    
    # Tri par revenus
    product_metrics = product_metrics.sort_values('Total_Revenue', ascending=False)
    
    print("üèÜ TOP 10 PRODUITS (Revenus) :")
    top_products = product_metrics.head(10)
    for idx, row in top_products.iterrows():
        print(f"  üì¶ {row['StockCode']}: ${row['Total_Revenue']:,.2f} | "
              f"{row['Unique_Customers']} clients | "
              f"${row['Revenue_per_Customer']:.2f}/client")
        print(f"      üìù {row['Description'][:50]}...")
    
    print(f"\nüë• TOP 10 PRODUITS (Popularit√©) :")
    popular_products = product_metrics.nlargest(10, 'Unique_Customers')
    for idx, row in popular_products.iterrows():
        print(f"  üì¶ {row['StockCode']}: {row['Unique_Customers']} clients | "
              f"${row['Total_Revenue']:,.2f} revenus")
        print(f"      üìù {row['Description'][:50]}...")
    
    # Graphique de corr√©lation
    fig5 = px.scatter(
        product_metrics.head(100),  # Top 100 pour lisibilit√©
        x='Unique_Customers',
        y='Total_Revenue',
        size='Total_Quantity',
        hover_data=['StockCode', 'Description'],
        title='üìä CORR√âLATION POPULARIT√â vs REVENUS (Top 100)',
        labels={'Unique_Customers': 'Nombre de Clients', 'Total_Revenue': 'Revenus ($)'}
    )
    
    fig5.update_layout(
        template='plotly_white',
        height=500
    )
    
    fig5.show()
    
    return product_metrics, fig5

# Analyse performance produits
product_data, product_fig = analyze_product_performance(df)


üì¶ ANALYSE PERFORMANCE PRODUITS
üèÜ TOP 10 PRODUITS (Revenus) :
  üì¶ DOT: $322,647.47 | 1429 clients | $225.79/client
      üìù DOTCOM POSTAGE...
  üì¶ 22423: $319,187.90 | 1951 clients | $163.60/client
      üìù REGENCY CAKESTAND 3 TIER...
  üì¶ 85123A: $248,519.61 | 1996 clients | $124.51/client
      üìù WHITE HANGING HEART T-LIGHT HOLDER...
  üì¶ 47566: $147,351.65 | 1515 clients | $97.26/client
      üìù PARTY BUNTING...
  üì¶ 85099B: $144,023.86 | 1529 clients | $94.19/client
      üìù JUMBO BAG RED RETROSPOT...
  üì¶ 84879: $128,691.54 | 1176 clients | $109.43/client
      üìù ASSORTED COLOUR BIRD ORNAMENT...
  üì¶ 22086: $118,683.69 | 1243 clients | $95.48/client
      üìù PAPER CHAIN KIT 50'S CHRISTMAS ...
  üì¶ POST: $110,430.41 | 592 clients | $186.54/client
      üìù POSTAGE...
  üì¶ 79321: $81,078.32 | 537 clients | $150.98/client
      üìù CHILLI LIGHTS...
  üì¶ 84347: $72,196.52 | 550 clients | $131.27/client
      üìù ROTATING SILVER ANGELS T-LIG

In [30]:
# ============================================================================
# üìÖ ANALYSE SAISONNALIT√â D√âTAILL√âE - NOTEBOOK 3
# ============================================================================

def seasonal_analysis_complete(df):
    """
    Analyse compl√®te des patterns saisonniers
    """
    print("üìÖ ANALYSE SAISONNALIT√â COMPL√àTE")
    print("=" * 40)
    
    # Pr√©paration des donn√©es temporelles
    df['Year'] = df['InvoiceDate'].dt.year
    df['Month'] = df['InvoiceDate'].dt.month
    df['MonthName'] = df['InvoiceDate'].dt.month_name()
    df['Quarter'] = df['InvoiceDate'].dt.quarter
    df['DayOfWeek'] = df['InvoiceDate'].dt.dayofweek
    df['DayName'] = df['InvoiceDate'].dt.day_name()
    df['Hour'] = df['InvoiceDate'].dt.hour
    df['Week'] = df['InvoiceDate'].dt.isocalendar().week
    
    # =======================================================================
    # 1. ANALYSE MENSUELLE D√âTAILL√âE
    # =======================================================================
    
    monthly_summary = df.groupby(['Year', 'Month', 'MonthName']).agg({
        'Total_Amount': ['sum', 'mean', 'count'],
        'Invoice': 'nunique',
        'Customer ID': 'nunique',
        'Quantity': 'sum'
    }).round(2)
    
    # Flatten column names
    monthly_summary.columns = ['_'.join(col).strip() for col in monthly_summary.columns]
    monthly_summary = monthly_summary.reset_index()
    
    # Calcul des m√©triques business
    monthly_summary['Revenue_per_Order'] = (
        monthly_summary['Total_Amount_sum'] / monthly_summary['Invoice_nunique']
    )
    monthly_summary['Revenue_per_Customer'] = (
        monthly_summary['Total_Amount_sum'] / monthly_summary['Customer ID_nunique']
    )
    
    print("üìä R√âSUM√â MENSUEL :")
    print(monthly_summary.head())
    
    # Graphique 1: √âvolution mensuelle multi-m√©triques
    fig1 = make_subplots(
        rows=2, cols=2,
        subplot_titles=('Revenus Mensuels', 'Nombre de Commandes', 
                       'Clients Uniques', 'Panier Moyen'),
        specs=[[{"secondary_y": False}, {"secondary_y": False}],
               [{"secondary_y": False}, {"secondary_y": False}]]
    )
    
    # Donn√©es pour graphique
    month_data = monthly_summary.groupby('Month').agg({
        'Total_Amount_sum': 'mean',
        'Invoice_nunique': 'mean',
        'Customer ID_nunique': 'mean',
        'Revenue_per_Order': 'mean'
    }).reset_index()
    
    # Ajout des traces
    fig1.add_trace(
        go.Scatter(x=month_data['Month'], y=month_data['Total_Amount_sum'],
                  mode='lines+markers', name='Revenus', line=dict(color='#1f77b4')),
        row=1, col=1
    )
    
    fig1.add_trace(
        go.Scatter(x=month_data['Month'], y=month_data['Invoice_nunique'],
                  mode='lines+markers', name='Commandes', line=dict(color='#ff7f0e')),
        row=1, col=2
    )
    
    fig1.add_trace(
        go.Scatter(x=month_data['Month'], y=month_data['Customer ID_nunique'],
                  mode='lines+markers', name='Clients', line=dict(color='#2ca02c')),
        row=2, col=1
    )
    
    fig1.add_trace(
        go.Scatter(x=month_data['Month'], y=month_data['Revenue_per_Order'],
                  mode='lines+markers', name='Panier Moyen', line=dict(color='#d62728')),
        row=2, col=2
    )
    
    fig1.update_layout(
        title='üìÖ ANALYSE SAISONNALIT√â MENSUELLE',
        height=600,
        showlegend=False,
        template='plotly_white'
    )
    
    fig1.show()
    
    # =======================================================================
    # 2. ANALYSE TRIMESTRIELLE
    # =======================================================================
    
    quarterly_data = df.groupby(['Year', 'Quarter']).agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    # Calcul croissance trimestrielle
    quarterly_data['Quarter_Label'] = 'Q' + quarterly_data['Quarter'].astype(str) + ' ' + quarterly_data['Year'].astype(str)
    quarterly_data['Revenue_Growth'] = quarterly_data['Total_Amount'].pct_change() * 100
    
    print(f"\nüìà ANALYSE TRIMESTRIELLE :")
    print(quarterly_data)
    
    # Graphique 2: Performance trimestrielle
    fig2 = px.bar(
        quarterly_data,
        x='Quarter_Label',
        y='Total_Amount',
        title='üìä REVENUS PAR TRIMESTRE',
        labels={'Total_Amount': 'Revenus ( $ )', 'Quarter_Label': 'Trimestre'}
    )
    
    fig2.update_layout(
        template='plotly_white',
        height=400
    )
    
    fig2.show()
    
    # =======================================================================
    # 3. ANALYSE HEBDOMADAIRE
    # =======================================================================
    
    weekly_pattern = df.groupby(['DayOfWeek', 'DayName']).agg({
        'Total_Amount': ['sum', 'mean'],
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    weekly_pattern.columns = ['DayOfWeek', 'DayName', 'Total_Revenue', 'Avg_Revenue',
                             'Total_Orders', 'Unique_Customers']
    
    # R√©organiser les jours
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    weekly_pattern['DayName'] = pd.Categorical(weekly_pattern['DayName'], categories=day_order, ordered=True)
    weekly_pattern = weekly_pattern.sort_values('DayName')
    
    print(f"\nüìÖ PATTERN HEBDOMADAIRE :")
    print(weekly_pattern)
    
    # Graphique 3: Pattern hebdomadaire
    fig3 = px.bar(
        weekly_pattern,
        x='DayName',
        y='Total_Revenue',
        title='üìä PATTERN HEBDOMADAIRE DES VENTES',
        labels={'Total_Revenue': 'Revenus ( $ )', 'DayName': 'Jour de la Semaine'}
    )
    
    fig3.update_layout(
        template='plotly_white',
        height=400
    )
    
    fig3.show()
    
    # =======================================================================
    # 4. ANALYSE HORAIRE
    # =======================================================================
    
    hourly_pattern = df.groupby('Hour').agg({
        'Total_Amount': ['sum', 'mean'],
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    hourly_pattern.columns = ['Hour', 'Total_Revenue', 'Avg_Revenue', 'Total_Orders', 'Unique_Customers']
    
    print(f"\nüïê PATTERN HORAIRE :")
    peak_hours = hourly_pattern.nlargest(5, 'Total_Revenue')
    print("Top 5 heures de pointe :")
    print(peak_hours[['Hour', 'Total_Revenue', 'Total_Orders']])
    
    # Graphique 4: Pattern horaire
    fig4 = px.line(
        hourly_pattern,
        x='Hour',
        y='Total_Revenue',
        title='üìä PATTERN HORAIRE DES VENTES',
        labels={'Total_Revenue': 'Revenus ($)', 'Hour': 'Heure'}
    )
    
    fig4.update_layout(
        template='plotly_white',
        height=400
    )
    
    fig4.show()
    
    # =======================================================================
    # 5. INSIGHTS SAISONNIERS
    # =======================================================================
    
    print(f"\nüéØ INSIGHTS SAISONNIERS CL√âS :")
    print("=" * 30)
    
    # Meilleur mois
    best_month = monthly_summary.loc[monthly_summary['Total_Amount_sum'].idxmax()]
    worst_month = monthly_summary.loc[monthly_summary['Total_Amount_sum'].idxmin()]
    
    print(f"üìà MEILLEUR MOIS : {best_month['MonthName']} {best_month['Year']}")
    print(f"   üí∞ Revenus: ${best_month['Total_Amount_sum']:,.2f}")
    print(f"   üì¶ Commandes: {best_month['Invoice_nunique']}")
    print(f"   üë• Clients: {best_month['Customer ID_nunique']}")
    
    print(f"\nüìâ MOIS LE PLUS FAIBLE : {worst_month['MonthName']} {worst_month['Year']}")
    print(f"   üí∞ Revenus: ${worst_month['Total_Amount_sum']:,.2f}")
    print(f"   üì¶ Commandes: {worst_month['Invoice_nunique']}")
    
    # Meilleur jour
    best_day = weekly_pattern.loc[weekly_pattern['Total_Revenue'].idxmax()]
    worst_day = weekly_pattern.loc[weekly_pattern['Total_Revenue'].idxmin()]
    
    print(f"\nüìà MEILLEUR JOUR : {best_day['DayName']}")
    print(f"   üí∞ Revenus moyens: ${best_day['Total_Revenue']:,.2f}")
    print(f"   üì¶ Commandes: {best_day['Total_Orders']}")
    
    print(f"\nüìâ JOUR LE PLUS FAIBLE : {worst_day['DayName']}")
    print(f"   üí∞ Revenus moyens: ${worst_day['Total_Revenue']:,.2f}")
    
    # Heures de pointe
    peak_hour = hourly_pattern.loc[hourly_pattern['Total_Revenue'].idxmax()]
    low_hour = hourly_pattern.loc[hourly_pattern['Total_Revenue'].idxmin()]
    
    print(f"\nüïê HEURE DE POINTE : {peak_hour['Hour']}h")
    print(f"   üí∞ Revenus: ${peak_hour['Total_Revenue']:,.2f}")
    print(f"   üì¶ Commandes: {peak_hour['Total_Orders']}")
    
    print(f"\nüïê HEURE CREUSE : {low_hour['Hour']}h")
    print(f"   üí∞ Revenus: ${low_hour['Total_Revenue']:,.2f}")
    
    return {
        'monthly_summary': monthly_summary,
        'quarterly_data': quarterly_data,
        'weekly_pattern': weekly_pattern,
        'hourly_pattern': hourly_pattern,
        'figures': [fig1, fig2, fig3, fig4]
    }

# Ex√©cution de l'analyse saisonnalit√©
seasonal_results = seasonal_analysis_complete(df)


üìÖ ANALYSE SAISONNALIT√â COMPL√àTE
üìä R√âSUM√â MENSUEL :
   Year  Month MonthName  Total_Amount_sum  Total_Amount_mean  \
0  2009     12  December         796700.16              17.81   
1  2010      1   January         622516.50              19.93   
2  2010      2  February         531288.27              18.28   
3  2010      3     March         763271.59              18.62   
4  2010      4     April         587953.24              17.47   

   Total_Amount_count  Invoice_nunique  Customer ID_nunique  Quantity_sum  \
0               44744             2330                14513        418597   
1               31238             1633                 9902        374524   
2               29060             1969                 6289        367503   
3               40989             2367                 9508        488021   
4               33653             1892                 7222        350533   

   Revenue_per_Order  Revenue_per_Customer  
0             341.93                 54.

In [35]:
# ============================================================================
# üîç D√âTECTION D'ANOMALIES - NOTEBOOK 3
# ============================================================================

def detect_anomalies_complete(df):
    """
    D√©tection compl√®te des anomalies dans les donn√©es e-commerce
    """
    print("üîç D√âTECTION D'ANOMALIES COMPL√àTE")
    print("=" * 40)
    
    # Imports pour d√©tection d'anomalies
    from scipy import stats
    from sklearn.ensemble import IsolationForest
    from sklearn.preprocessing import StandardScaler
    import numpy as np
    
    # =======================================================================
    # 1. D√âTECTION ANOMALIES TRANSACTIONS
    # =======================================================================
    
    print("üí∞ ANALYSE DES ANOMALIES TRANSACTIONNELLES")
    print("-" * 45)
    
    # Statistiques descriptives
    transaction_stats = df.groupby('Invoice').agg({
        'Total_Amount': 'sum',
        'Quantity': 'sum',
        'StockCode': 'nunique'
    }).reset_index()
    
    transaction_stats.columns = ['Invoice', 'Total_Order_Value', 'Total_Items', 'Unique_Products']
    
    # Calcul des seuils d'anomalie (m√©thode IQR)
    def detect_outliers_iqr(data, column):
        Q1 = data[column].quantile(0.25)
        Q3 = data[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        outliers = data[(data[column] < lower_bound) | (data[column] > upper_bound)]
        return outliers, lower_bound, upper_bound
    
    # D√©tection anomalies valeur commande
    order_anomalies, lower_order, upper_order = detect_outliers_iqr(
        transaction_stats, 'Total_Order_Value'
    )
    
    print(f"üìä STATISTIQUES VALEUR COMMANDE :")
    print(f"   Moyenne: ${transaction_stats['Total_Order_Value'].mean():.2f}")
    print(f"   M√©diane: ${transaction_stats['Total_Order_Value'].median():.2f}")
    print(f"   √âcart-type: ${transaction_stats['Total_Order_Value'].std():.2f}")
    print(f"   Seuil inf√©rieur: ${lower_order:.2f}")
    print(f"   Seuil sup√©rieur: ${upper_order:.2f}")
    print(f"   üö® Anomalies d√©tect√©es: {len(order_anomalies)} commandes")
    
    # Top anomalies par valeur
    print(f"\nüî• TOP 10 COMMANDES ANORMALEMENT √âLEV√âES :")
    top_anomalies = order_anomalies.nlargest(10, 'Total_Order_Value')
    for _, row in top_anomalies.iterrows():
        print(f"   üìã {row['Invoice']}: ${row['Total_Order_Value']:,.2f} | "
              f"{row['Total_Items']} items | {row['Unique_Products']} produits")
    
    # Graphique 1: Distribution avec anomalies
    fig1 = go.Figure()
    
    # Histogramme normal
    fig1.add_trace(go.Histogram(
        x=transaction_stats['Total_Order_Value'],
        nbinsx=50,
        name='Commandes normales',
        opacity=0.7
    ))
    
    # Ligne seuil sup√©rieur
    fig1.add_vline(
        x=upper_order,
        line_dash="dash",
        line_color="red",
        annotation_text=f"Seuil anomalie:  $ {upper_order:.0f}"
    )
    
    fig1.update_layout(
        title='üìä DISTRIBUTION VALEUR COMMANDES + SEUIL ANOMALIES',
        xaxis_title='Valeur Commande ( $ )',
        yaxis_title='Fr√©quence',
        template='plotly_white',
        height=400
    )
    
    fig1.show()
    
    # =======================================================================
    # 2. D√âTECTION ANOMALIES PRODUITS
    # =======================================================================
    
    print(f"\nüì¶ ANALYSE DES ANOMALIES PRODUITS")
    print("-" * 40)
    
    # Analyse par produit
    product_anomalies = df.groupby('StockCode').agg({
        'Total_Amount': 'sum',
        'Quantity': 'sum',
        'Price': ['mean', 'std', 'min', 'max'],
        'Customer ID': 'nunique'
    }).reset_index()
    
    # Flatten columns
    product_anomalies.columns = ['StockCode', 'Total_Revenue', 'Total_Quantity', 
                                'Avg_Price', 'Price_Std', 'Min_Price', 'Max_Price', 'Unique_Customers']
    
    # D√©tection prix anormaux
    product_anomalies['Price_Variation'] = product_anomalies['Price_Std'] / product_anomalies['Avg_Price']
    product_anomalies['Price_Range'] = product_anomalies['Max_Price'] - product_anomalies['Min_Price']
    
    # Produits avec variations de prix suspectes
    price_anomalies = product_anomalies[
        (product_anomalies['Price_Variation'] > 1.0) |  # Variation > 100%
        (product_anomalies['Price_Range'] > product_anomalies['Avg_Price'] * 2)  # Range > 200% du prix moyen
    ].sort_values('Price_Variation', ascending=False)
    
    print(f"üö® PRODUITS AVEC VARIATIONS DE PRIX SUSPECTES : {len(price_anomalies)}")
    print("Top 10 variations de prix :")
    for _, row in price_anomalies.head(10).iterrows():
        print(f"   üì¶ {row['StockCode']}: Variation {row['Price_Variation']:.2f}x | "
              f"Prix: ${row['Min_Price']:.2f} -  $ {row['Max_Price']:.2f}")
    
    # Graphique 2: Scatter plot prix vs quantit√©
    product_anomalies['Total_Revenue_Abs'] = product_anomalies['Total_Revenue'].abs()
    fig2 = px.scatter(
        product_anomalies.head(1000),  # Top 1000 pour lisibilit√©
        x='Avg_Price',
        y='Total_Quantity',
        size='Total_Revenue_Abs',
        color='Price_Variation',
        hover_data=['StockCode'],
        title='üìä D√âTECTION ANOMALIES PRODUITS : PRIX vs QUANTIT√â',
        labels={'Avg_Price': 'Prix Moyen ( $ )', 'Total_Quantity': 'Quantit√© Totale'},
        color_continuous_scale='Viridis'
    )
    
    fig2.update_layout(
        template='plotly_white',
        height=500
    )
    
    fig2.show()
    
    # =======================================================================
    # 3. D√âTECTION ANOMALIES CLIENTS
    # =======================================================================
    
    print(f"\nüë• ANALYSE DES ANOMALIES CLIENTS")
    print("-" * 35)
    
    # Analyse comportement client
    customer_behavior = df.groupby('Customer ID').agg({
        'Total_Amount': ['sum', 'mean', 'count'],
        'Invoice': 'nunique',
        'StockCode': 'nunique',
        'Quantity': 'sum'
    }).reset_index()
    
    customer_behavior.columns = ['Customer_ID', 'Total_Spent', 'Avg_Transaction', 
                               'Total_Transactions', 'Unique_Orders', 'Unique_Products', 'Total_Items']
    
    # M√©triques clients
    customer_behavior['Avg_Items_per_Order'] = customer_behavior['Total_Items'] / customer_behavior['Unique_Orders']
    customer_behavior['Avg_Products_per_Order'] = customer_behavior['Unique_Products'] / customer_behavior['Unique_Orders']
    
    # D√©tection clients anormaux avec Isolation Forest
    features_for_anomaly = ['Total_Spent', 'Avg_Transaction', 'Total_Transactions', 
                           'Unique_Products', 'Avg_Items_per_Order']
    
    # Pr√©paration des donn√©es
    X = customer_behavior[features_for_anomaly].fillna(0)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Isolation Forest
    iso_forest = IsolationForest(contamination=0.05, random_state=42)
    customer_behavior['Anomaly'] = iso_forest.fit_predict(X_scaled)
    customer_behavior['Anomaly_Score'] = iso_forest.decision_function(X_scaled)
    
    # Clients anormaux
    anomalous_customers = customer_behavior[customer_behavior['Anomaly'] == -1].sort_values('Anomaly_Score')
    
    print(f"üö® CLIENTS ANORMAUX D√âTECT√âS : {len(anomalous_customers)}")
    print("Top 10 clients les plus anormaux :")
    for _, row in anomalous_customers.head(10).iterrows():
        print(f"   üë§ Client {row['Customer_ID']}:  $ {row['Total_Spent']:,.2f} | "
              f"{row['Unique_Orders']} commandes | Score: {row['Anomaly_Score']:.3f}")
    
    # Graphique 3: Visualisation anomalies clients
    fig3 = px.scatter(
        customer_behavior,
        x='Total_Spent',
        y='Avg_Transaction',
        color='Anomaly',
        color_discrete_map={1: 'blue', -1: 'red'},
        title='üìä D√âTECTION ANOMALIES CLIENTS',
        labels={'Total_Spent': 'D√©pense Totale ( $ )', 'Avg_Transaction': 'Transaction Moyenne ($)'},
        hover_data=['Customer_ID', 'Unique_Orders']
    )
    
    fig3.update_layout(
        template='plotly_white',
        height=500
    )
    
    fig3.show()
    
    # =======================================================================
    # 4. D√âTECTION ANOMALIES TEMPORELLES
    # =======================================================================
    
    print(f"\n‚è∞ ANALYSE DES ANOMALIES TEMPORELLES")
    print("-" * 40)
    
    # Agr√©gation journali√®re
    daily_sales = df.groupby(df['InvoiceDate'].dt.date).agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique',
        'Customer ID': 'nunique'
    }).reset_index()
    
    daily_sales.columns = ['Date', 'Daily_Revenue', 'Daily_Orders', 'Daily_Customers']
    
    # Z-score pour d√©tection anomalies temporelles
    daily_sales['Revenue_ZScore'] = np.abs(stats.zscore(daily_sales['Daily_Revenue']))
    daily_sales['Orders_ZScore'] = np.abs(stats.zscore(daily_sales['Daily_Orders']))
    
    # Jours anormaux (Z-score > 2)
    temporal_anomalies = daily_sales[
        (daily_sales['Revenue_ZScore'] > 2) | 
        (daily_sales['Orders_ZScore'] > 2)
    ].sort_values('Revenue_ZScore', ascending=False)
    
    print(f"üö® JOURS AVEC ACTIVIT√â ANORMALE : {len(temporal_anomalies)}")
    print("Top 10 jours anormaux :")
    for _, row in temporal_anomalies.head(10).iterrows():
        print(f"   üìÖ {row['Date']}:  $ {row['Daily_Revenue']:,.2f} | "
              f"{row['Daily_Orders']} commandes | Z-score: {row['Revenue_ZScore']:.2f}")
    
    # Graphique 4: S√©rie temporelle avec anomalies
    fig4 = go.Figure()
    
    # S√©rie normale
    fig4.add_trace(go.Scatter(
        x=daily_sales['Date'],
        y=daily_sales['Daily_Revenue'],
        mode='lines',
        name='Revenus journaliers',
        line=dict(color='blue')
    ))
    
    # Points anomalies
    fig4.add_trace(go.Scatter(
        x=temporal_anomalies['Date'],
        y=temporal_anomalies['Daily_Revenue'],
        mode='markers',
        name='Anomalies',
        marker=dict(color='red', size=8),
    ))
    
    fig4.update_layout(
        title='üìä D√âTECTION ANOMALIES TEMPORELLES',
        xaxis_title='Date',
        yaxis_title='Revenus Journaliers ( $ )',
        template='plotly_white',
        height=500
    )
    
    fig4.show()
    
    # =======================================================================
    # 5. R√âSUM√â ANOMALIES
    # =======================================================================
    
    print(f"\nüéØ R√âSUM√â D√âTECTION D'ANOMALIES")
    print("=" * 35)
    print(f"üìä TRANSACTIONS ANORMALES : {len(order_anomalies)} commandes")
    print(f"üì¶ PRODUITS SUSPECTS : {len(price_anomalies)} produits")
    print(f"üë• CLIENTS ANORMAUX : {len(anomalous_customers)} clients")
    print(f"‚è∞ JOURS ANORMAUX : {len(temporal_anomalies)} jours")
    
    # Pourcentages
    total_transactions = len(transaction_stats)
    total_products = len(product_anomalies)
    total_customers = len(customer_behavior)
    total_days = len(daily_sales)
    
    print(f"\nüìà POURCENTAGES D'ANOMALIES :")
    print(f"   Transactions: {len(order_anomalies)/total_transactions*100:.1f}%")
    print(f"   Produits: {len(price_anomalies)/total_products*100:.1f}%")
    print(f"   Clients: {len(anomalous_customers)/total_customers*100:.1f}%")
    print(f"   Jours: {len(temporal_anomalies)/total_days*100:.1f}%")
    
    # Recommandations
    print(f"\nüí° RECOMMANDATIONS :")
    print("üîç ACTIONS IMM√âDIATES :")
    print("   ‚Ä¢ V√©rifier les commandes > $10,000")
    print("   ‚Ä¢ Investiguer les variations de prix > 100%")
    print("   ‚Ä¢ Analyser les clients avec score anomalie < -0.5")
    print("   ‚Ä¢ Examiner les pics de ventes inexpliqu√©s")
    
    print("üõ°Ô∏è MESURES PR√âVENTIVES :")
    print("   ‚Ä¢ Alertes automatiques pour commandes > seuil")
    print("   ‚Ä¢ Validation manuelle pour variations prix > 50%")
    print("   ‚Ä¢ Monitoring comportement clients VIP")
    print("   ‚Ä¢ Syst√®me de d√©tection temps r√©el")
    
    return {
        'transaction_anomalies': order_anomalies,
        'product_anomalies': price_anomalies,
        'customer_anomalies': anomalous_customers,
        'temporal_anomalies': temporal_anomalies,
        'figures': [fig1, fig2, fig3, fig4],
        'summary_stats': {
            'total_transaction_anomalies': len(order_anomalies),
            'total_product_anomalies': len(price_anomalies),
            'total_customer_anomalies': len(anomalous_customers),
            'total_temporal_anomalies': len(temporal_anomalies)
        }
    }

# Ex√©cution de la d√©tection d'anomalies
anomaly_results = detect_anomalies_complete(df)


üîç D√âTECTION D'ANOMALIES COMPL√àTE
üí∞ ANALYSE DES ANOMALIES TRANSACTIONNELLES
---------------------------------------------
üìä STATISTIQUES VALEUR COMMANDE :
   Moyenne: $352.97
   M√©diane: $192.34
   √âcart-type: $1645.25
   Seuil inf√©rieur: $-592.48
   Seuil sup√©rieur: $987.46
   üö® Anomalies d√©tect√©es: 3972 commandes

üî• TOP 10 COMMANDES ANORMALEMENT √âLEV√âES :
   üìã 581483: $168,469.60 | 80995 items | 1 produits
   üìã 541431: $77,183.60 | 74215 items | 1 produits
   üìã 574941: $52,940.94 | 14149 items | 101 produits
   üìã 576365: $50,653.91 | 13956 items | 99 produits
   üìã 533027: $49,844.99 | 13387 items | 111 produits
   üìã 531516: $45,332.97 | 12410 items | 115 produits
   üìã 493819: $44,051.60 | 25018 items | 94 produits
   üìã 556444: $38,970.00 | 60 items | 1 produits
   üìã 524181: $33,167.80 | 8820 items | 14 produits
   üìã 567423: $31,698.16 | 12572 items | 12 produits

üì¶ ANALYSE DES ANOMALIES PRODUITS
--------------------------------

In [36]:
# ============================================================================
# üìã RAPPORT FINAL + RECOMMANDATIONS - NOTEBOOK 3
# ============================================================================

def generate_final_report(df, seasonal_results, anomaly_results):
    """
    G√©n√©ration du rapport final avec recommandations strat√©giques
    """
    print("üìã RAPPORT FINAL - ANALYSE EXPLORATOIRE")
    print("=" * 50)
    
    # =======================================================================
    # 1. SYNTH√àSE EX√âCUTIVE
    # =======================================================================
    
    print("üéØ SYNTH√àSE EX√âCUTIVE")
    print("=" * 20)
    
    # M√©triques cl√©s globales
    total_revenue = df['Total_Amount'].sum()
    total_transactions = df['Invoice'].nunique()
    total_customers = df['Customer ID'].nunique()
    total_products = df['StockCode'].nunique()
    avg_order_value = total_revenue / total_transactions
    
    # P√©riode d'analyse
    start_date = df['InvoiceDate'].min()
    end_date = df['InvoiceDate'].max()
    analysis_period = (end_date - start_date).days
    
    print(f"üìä M√âTRIQUES BUSINESS GLOBALES")
    print(f"   üí∞ Chiffre d'affaires total: ${total_revenue:,.2f}")
    print(f"   üì¶ Nombre de commandes: {total_transactions:,}")
    print(f"   üë• Clients uniques: {total_customers:,}")
    print(f"   üõçÔ∏è Produits uniques: {total_products:,}")
    print(f"   üí≥ Panier moyen: ${avg_order_value:.2f}")
    print(f"   üìÖ P√©riode analys√©e: {analysis_period} jours ({start_date.strftime('%Y-%m-%d')} ‚Üí {end_date.strftime('%Y-%m-%d')})")
    
    # =======================================================================
    # 2. INSIGHTS CL√âS PAR DIMENSION
    # =======================================================================
    
    print(f"\nüîç INSIGHTS CL√âS D√âCOUVERTS")
    print("=" * 30)
    
    # A. INSIGHTS CLIENTS
    print("üë• DIMENSION CLIENTS :")
    customer_segments = df.groupby('Customer ID').agg({
        'Total_Amount': 'sum',
        'Invoice': 'nunique'
    }).reset_index()
    
    # Segmentation clients
    top_20_customers = customer_segments.nlargest(int(len(customer_segments) * 0.2), 'Total_Amount')
    top_20_revenue = top_20_customers['Total_Amount'].sum()
    
    print(f"   üíé Top 20% clients g√©n√®rent {top_20_revenue/total_revenue*100:.1f}% du CA")
    print(f"   üìà Client le plus valuable: ${customer_segments['Total_Amount'].max():,.2f}")
    print(f"   üîÑ Commandes moyennes par client: {customer_segments['Invoice'].mean():.1f}")
    
    # B. INSIGHTS PRODUITS
    print(f"\nüì¶ DIMENSION PRODUITS :")
    product_performance = df.groupby('StockCode').agg({
        'Total_Amount': 'sum',
        'Quantity': 'sum'
    }).reset_index()
    
    top_products = product_performance.nlargest(10, 'Total_Amount')
    top_10_revenue = top_products['Total_Amount'].sum()
    
    print(f"   üèÜ Top 10 produits repr√©sentent {top_10_revenue/total_revenue*100:.1f}% du CA")
    print(f"   üéØ Produit star:  $ {product_performance['Total_Amount'].max():,.2f} de revenus")
    print(f"   üìä 80/20 Rule: {(product_performance['Total_Amount'] > product_performance['Total_Amount'].quantile(0.8)).sum()} produits g√©n√®rent 80% du CA")
    
    # C. INSIGHTS TEMPORELS
    print(f"\n‚è∞ DIMENSION TEMPORELLE :")
    
    # Meilleurs mois (depuis seasonal_results)
    monthly_data = df.groupby(df['InvoiceDate'].dt.month).agg({
        'Total_Amount': 'sum'
    }).reset_index()
    
    best_month = monthly_data.loc[monthly_data['Total_Amount'].idxmax(), 'InvoiceDate']
    worst_month = monthly_data.loc[monthly_data['Total_Amount'].idxmin(), 'InvoiceDate']
    
    print(f"   üìà Meilleur mois: Mois {best_month} ({monthly_data['Total_Amount'].max():,.0f} $ )")
    print(f"   üìâ Mois le plus faible: Mois {worst_month} ({monthly_data['Total_Amount'].min():,.0f}$)")
    
    # Jour de la semaine
    weekday_performance = df.groupby(df['InvoiceDate'].dt.dayofweek)['Total_Amount'].sum()
    best_day = weekday_performance.idxmax()
    days_names = ['Lundi', 'Mardi', 'Mercredi', 'Jeudi', 'Vendredi', 'Samedi', 'Dimanche']
    
    print(f"   üóìÔ∏è Meilleur jour: {days_names[best_day]} ({weekday_performance.max():,.0f}$)")
    
    # =======================================================================
    # 3. ANALYSE DES ANOMALIES D√âTECT√âES
    # =======================================================================
    
    print(f"\nüö® ANOMALIES CRITIQUES D√âTECT√âES")
    print("=" * 35)
    
    # R√©cup√©ration des r√©sultats d'anomalies
    anom_stats = anomaly_results['summary_stats']
    
    print(f"üìä R√âSUM√â DES ANOMALIES :")
    print(f"   üí∞ Transactions suspectes: {anom_stats['total_transaction_anomalies']}")
    print(f"   üì¶ Produits avec prix anormaux: {anom_stats['total_product_anomalies']}")
    print(f"   üë• Clients au comportement atypique: {anom_stats['total_customer_anomalies']}")
    print(f"   ‚è∞ Jours avec activit√© anormale: {anom_stats['total_temporal_anomalies']}")
    
    # Impact des anomalies
    transaction_anomalies = anomaly_results['transaction_anomalies']
    if len(transaction_anomalies) > 0:
        anomaly_revenue = transaction_anomalies['Total_Order_Value'].sum()
        print(f"   üí∏ Impact financier anomalies: ${anomaly_revenue:,.2f} ({anomaly_revenue/total_revenue*100:.1f}% du CA)")
    
    # =======================================================================
    # 4. DASHBOARD R√âCAPITULATIF
    # =======================================================================
    
    print(f"\nüìä CR√âATION DU DASHBOARD R√âCAPITULATIF")
    print("=" * 40)
    
    # Dashboard avec 4 graphiques principaux
    fig_dashboard = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'üí∞ TOP 10 CLIENTS (CA)',
            'üì¶ TOP 10 PRODUITS (Revenus)',
            'üìÖ √âVOLUTION MENSUELLE',
            'üïê PERFORMANCE HORAIRE'
        ),
        specs=[[{"type": "bar"}, {"type": "bar"}],
               [{"type": "scatter"}, {"type": "bar"}]]
    )
    
    # 1. Top 10 clients
    top_customers = df.groupby('Customer ID')['Total_Amount'].sum().nlargest(10)
    fig_dashboard.add_trace(
        go.Bar(x=top_customers.index.astype(str), y=top_customers.values, name='Top Clients'),
        row=1, col=1
    )
    
    # 2. Top 10 produits
    top_products_viz = df.groupby('StockCode')['Total_Amount'].sum().nlargest(10)
    fig_dashboard.add_trace(
        go.Bar(x=top_products_viz.index, y=top_products_viz.values, name='Top Produits'),
        row=1, col=2
    )
    
    # 3. √âvolution mensuelle
    monthly_evolution = df.groupby(df['InvoiceDate'].dt.to_period('M'))['Total_Amount'].sum()
    fig_dashboard.add_trace(
        go.Scatter(x=monthly_evolution.index.astype(str), y=monthly_evolution.values, 
                  mode='lines+markers', name='CA Mensuel'),
        row=2, col=1
    )
    
    # 4. Performance horaire
    hourly_perf = df.groupby(df['InvoiceDate'].dt.hour)['Total_Amount'].sum()
    fig_dashboard.add_trace(
        go.Bar(x=hourly_perf.index, y=hourly_perf.values, name='CA par Heure'),
        row=2, col=2
    )
    
    fig_dashboard.update_layout(
        title_text="üìä DASHBOARD R√âCAPITULATIF - ANALYSE E-COMMERCE",
        showlegend=False,
        height=800,
        template='plotly_white'
    )
    
    fig_dashboard.show()
    
    # =======================================================================
    # 5. RECOMMANDATIONS STRAT√âGIQUES
    # =======================================================================
    
    print(f"\nüéØ RECOMMANDATIONS STRAT√âGIQUES")
    print("=" * 35)
    
    print("üöÄ RECOMMANDATIONS IMM√âDIATES (0-3 mois):")
    print("   1. üë• CLIENTS :")
    print("      ‚Ä¢ Cr√©er programme fid√©lit√© pour top 20% clients")
    print("      ‚Ä¢ Campagne de r√©activation clients inactifs")
    print("      ‚Ä¢ Analyse d√©taill√©e des clients √† forte valeur")
    print("      ‚Ä¢ Segmentation RFM pour personnalisation")
    
    print("   2. üì¶ PRODUITS :")
    print("      ‚Ä¢ Optimiser stock des produits stars")
    print("      ‚Ä¢ Analyser marge des top performers")
    print("      ‚Ä¢ Cross-selling sur produits compl√©mentaires")
    print("      ‚Ä¢ √âliminer produits low-performers")
    
    print("   3. üí∞ PRICING :")
    print("      ‚Ä¢ Investiguer variations prix anormales")
    print("      ‚Ä¢ Standardiser politique tarifaire")
    print("      ‚Ä¢ Tests A/B sur pricing dynamique")
    print("      ‚Ä¢ Monitoring concurrence")
    
    print("   4. üõ°Ô∏è CONTR√îLE QUALIT√â :")
    print("      ‚Ä¢ Syst√®me d'alertes pour commandes > $10K")
    print("      ‚Ä¢ Validation manuelle transactions suspectes")
    print("      ‚Ä¢ Audit des comptes clients anormaux")
    print("      ‚Ä¢ V√©rification des pics de ventes")
    
    print(f"\nüìà RECOMMANDATIONS MOYEN TERME (3-12 mois):")
    print("   1. üéØ STRAT√âGIE MARKETING :")
    print("      ‚Ä¢ Campagnes cibl√©es par segment client")
    print("      ‚Ä¢ Optimisation saisonni√®re des promotions")
    print("      ‚Ä¢ Marketing automation bas√© sur comportement")
    print("      ‚Ä¢ Strat√©gie omnicanal")
    
    print("   2. üìä ANALYTICS AVANC√âS :")
    print("      ‚Ä¢ Mod√®les pr√©dictifs de churn")
    print("      ‚Ä¢ Scoring de valeur client (CLV)")
    print("      ‚Ä¢ Recommandations produits IA")
    print("      ‚Ä¢ Forecasting des ventes")
    
    print("   3. üèóÔ∏è INFRASTRUCTURE :")
    print("      ‚Ä¢ Dashboard temps r√©el")
    print("      ‚Ä¢ Syst√®me de d√©tection anomalies automatique")
    print("      ‚Ä¢ Data pipeline optimis√©")
    print("      ‚Ä¢ Reporting automatis√©")
    
    # =======================================================================
    # 6. ROADMAP PROCHAINES ANALYSES
    # =======================================================================
    
    print(f"\nüó∫Ô∏è ROADMAP PROCHAINES ANALYSES")
    print("=" * 30)
    
    print("üìã NOTEBOOK 4 - FEATURE ENGINEERING :")
    print("   ‚Ä¢ Cr√©ation variables RFM")
    print("   ‚Ä¢ Features temporelles avanc√©es")
    print("   ‚Ä¢ Encoding cat√©gorielles")
    print("   ‚Ä¢ Features d'interaction")
    
    print("üìã NOTEBOOK 5 - MACHINE LEARNING :")
    print("   ‚Ä¢ Clustering clients (K-means)")
    print("   ‚Ä¢ Pr√©diction valeur client")
    print("   ‚Ä¢ Syst√®me de recommandation")
    print("   ‚Ä¢ D√©tection churn")
    
    print("üìã NOTEBOOK 6 - D√âPLOIEMENT :")
    print("   ‚Ä¢ API de pr√©diction")
    print("   ‚Ä¢ Dashboard interactif")
    print("   ‚Ä¢ Monitoring mod√®les")
    print("   ‚Ä¢ Documentation compl√®te")
    
    # =======================================================================
    # 7. M√âTRIQUES DE PERFORMANCE
    # =======================================================================
    
    print(f"\nüìà M√âTRIQUES DE PERFORMANCE √Ä SUIVRE")
    print("=" * 40)
    
    # Calcul des KPIs actuels
    monthly_growth = df.groupby(df['InvoiceDate'].dt.to_period('M'))['Total_Amount'].sum()
    if len(monthly_growth) > 1:
        last_month_growth = ((monthly_growth.iloc[-1] - monthly_growth.iloc[-2]) / monthly_growth.iloc[-2] * 100)
    else:
        last_month_growth = 0
    
    print("üéØ KPIs ACTUELS :")
    print(f"   üí∞ CA mensuel moyen: ${monthly_growth.mean():,.2f}")
    print(f"   üìà Croissance dernier mois: {last_month_growth:.1f}%")
    print(f"   üë• Clients actifs/mois: {total_customers/(analysis_period/30):.0f}")
    print(f"   üõí Commandes/jour: {total_transactions/(analysis_period):.1f}")
    print(f"   üí≥ Panier moyen: ${avg_order_value:.2f}")
    
    print(f"\nüéØ KPIs √Ä MONITORER :")
    print("   ‚Ä¢ Taux de r√©tention client")
    print("   ‚Ä¢ Customer Lifetime Value (CLV)")
    print("   ‚Ä¢ Taux de conversion")
    print("   ‚Ä¢ Fr√©quence d'achat")
    print("   ‚Ä¢ Marge brute par segment")
    
    # Sauvegarde des r√©sultats
    report_summary = {
        'total_revenue': total_revenue,
        'total_transactions': total_transactions,
        'total_customers': total_customers,
        'avg_order_value': avg_order_value,
        'analysis_period': analysis_period,
        'anomalies_detected': anom_stats,
        'top_customers': top_customers.to_dict(),
        'top_products': top_products_viz.to_dict(),
        'monthly_growth': last_month_growth
    }
    
    print(f"\n‚úÖ RAPPORT FINAL G√âN√âR√â AVEC SUCC√àS !")
    print("=" * 35)
    
    return report_summary, fig_dashboard

# G√©n√©ration du rapport final
final_report, dashboard_fig = generate_final_report(df, seasonal_results, anomaly_results)

# =======================================================================
# CONCLUSION NOTEBOOK 3
# =======================================================================

print("\n" + "="*60)
print("üéâ NOTEBOOK 3 - ANALYSE EXPLORATOIRE TERMIN√â !")
print("="*60)

print(f"\nüìä R√âSUM√â DE CE QUE NOUS AVONS ACCOMPLI :")
print("   ‚úÖ Analyse exploratoire compl√®te")
print("   ‚úÖ Visualisations avanc√©es (15+ graphiques)")
print("   ‚úÖ Analyse saisonnalit√© d√©taill√©e")
print("   ‚úÖ D√©tection d'anomalies multi-dimensionnelle")
print("   ‚úÖ Insights business actionnables")
print("   ‚úÖ Recommandations strat√©giques")
print("   ‚úÖ Dashboard r√©capitulatif")
print("   ‚úÖ Roadmap pour la suite")

print(f"\nüöÄ PROCHAINE √âTAPE : NOTEBOOK 4 - FEATURE ENGINEERING")
print("   üéØ Objectif: Pr√©parer les donn√©es pour le ML")
print("   üõ†Ô∏è Techniques: RFM, Variables temporelles, Encoding")
print("   üìä R√©sultat: Dataset pr√™t pour mod√©lisation")

print(f"\nüíæ DONN√âES PR√äTES POUR LA SUITE :")
print(f"   ‚Ä¢ Dataset nettoy√©: {len(df)} lignes")
print(f"   ‚Ä¢ Variables cr√©√©es: {len(df.columns)} colonnes")
print(f"   ‚Ä¢ Anomalies identifi√©es: {sum(final_report['anomalies_detected'].values())}")
print(f"   ‚Ä¢ Insights business: 20+ recommandations")




üìã RAPPORT FINAL - ANALYSE EXPLORATOIRE
üéØ SYNTH√àSE EX√âCUTIVE
üìä M√âTRIQUES BUSINESS GLOBALES
   üí∞ Chiffre d'affaires total: $18,928,949.47
   üì¶ Nombre de commandes: 53,628
   üë• Clients uniques: 248,949
   üõçÔ∏è Produits uniques: 5,305
   üí≥ Panier moyen: $352.97
   üìÖ P√©riode analys√©e: 738 jours (2009-12-01 ‚Üí 2011-12-09)

üîç INSIGHTS CL√âS D√âCOUVERTS
üë• DIMENSION CLIENTS :
   üíé Top 20% clients g√©n√®rent 98.5% du CA
   üìà Client le plus valuable: $570,380.61
   üîÑ Commandes moyennes par client: 1.2

üì¶ DIMENSION PRODUITS :
   üèÜ Top 10 produits repr√©sentent 9.2% du CA
   üéØ Produit star:  $ 322,647.47 de revenus
   üìä 80/20 Rule: 1061 produits g√©n√®rent 80% du CA

‚è∞ DIMENSION TEMPORELLE :
   üìà Meilleur mois: Mois 11 (2,872,964 $ )
   üìâ Mois le plus faible: Mois 2 (1,028,339$)
   üóìÔ∏è Meilleur jour: Jeudi (3,902,397$)

üö® ANOMALIES CRITIQUES D√âTECT√âES
üìä R√âSUM√â DES ANOMALIES :
   üí∞ Transactions suspectes: 3972
   ü