In [None]:
#!/usr/bin/env python3
# ===============================================
# PP_20 - KELLY CRITERION & BACKTEST (GOD SOTA 2026)
# TennisTitan - Optimal Bet Sizing & Performance Analysis
# ===============================================
#
# OBJECTIF:
# 1. Calculer le sizing optimal des paris (Kelly Criterion)
# 2. Backtest sur historique avec frais r√©alistes
# 3. Analyse de performance: ROI, Sharpe, Max Drawdown
# 4. Filtrage des value bets
#
# FORMULE KELLY:
# f* = (bp - q) / b
# o√π: b = odds - 1 (profit net par unit√©)
#     p = notre probabilit√© estim√©e
#     q = 1 - p
#
# Input: predictions/monte_carlo/ ou models/god_sota_2026/
# Output: betting/backtest_results/
# ===============================================

import numpy as np
import polars as pl
import pandas as pd
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple
import json
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# ===============================================
# CONFIGURATION
# ===============================================
ROOT = Path(r"C:\Users\Administrateur\Tennis POLAR v2")
DATA_DIR = ROOT / "data_clean" / "ml_final"
MC_DIR = ROOT / "predictions" / "monte_carlo"
OUTPUT_DIR = ROOT / "betting" / "backtest_results"
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# Betting parameters
INITIAL_BANKROLL = 10000  # Capital initial
KELLY_FRACTION = 0.25     # Fraction de Kelly (recommand√©: 0.25 = quart de Kelly)
MIN_EDGE = 0.02           # Edge minimum pour parier (2%)
MAX_BET_FRACTION = 0.05   # Max 5% du bankroll par pari
MIN_ODDS = 1.20           # Odds minimum
MAX_ODDS = 10.0           # Odds maximum
COMMISSION = 0.05         # Commission bookmaker (5% sur gains)

print("=" * 70)
print("   PP_20 - KELLY CRITERION & BACKTEST (GOD SOTA 2026)")
print("=" * 70)
print(f"   {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"   Initial Bankroll: ${INITIAL_BANKROLL:,}")
print(f"   Kelly Fraction: {KELLY_FRACTION}")
print(f"   Min Edge: {MIN_EDGE:.1%}")
print("=" * 70)


# ===============================================
# KELLY CRITERION CALCULATOR
# ===============================================

class KellyCalculator:
    """
    Calcule le sizing optimal selon le crit√®re de Kelly.
    
    Le crit√®re de Kelly maximise le taux de croissance g√©om√©trique
    du capital √† long terme.
    """
    
    def __init__(self, kelly_fraction: float = 0.25,
                 min_edge: float = 0.02,
                 max_bet_fraction: float = 0.05,
                 min_odds: float = 1.20,
                 max_odds: float = 10.0):
        self.kelly_fraction = kelly_fraction
        self.min_edge = min_edge
        self.max_bet_fraction = max_bet_fraction
        self.min_odds = min_odds
        self.max_odds = max_odds
    
    def calculate_kelly(self, p_win: float, odds: float) -> float:
        """
        Calcule la fraction de Kelly.
        
        Args:
            p_win: Notre probabilit√© estim√©e de gagner
            odds: Cotes d√©cimales (ex: 2.0 = even money)
        
        Returns:
            Fraction du bankroll √† parier (0 si pas de value)
        """
        
        if odds < self.min_odds or odds > self.max_odds:
            return 0.0
        
        if p_win <= 0 or p_win >= 1:
            return 0.0
        
        # Probabilit√© implicite des odds
        p_implied = 1 / odds
        
        # Edge = notre proba - proba implicite
        edge = p_win - p_implied
        
        if edge < self.min_edge:
            return 0.0
        
        # Kelly formula: f* = (bp - q) / b
        # o√π b = odds - 1, p = p_win, q = 1 - p_win
        b = odds - 1
        q = 1 - p_win
        
        kelly_full = (b * p_win - q) / b
        
        # Appliquer la fraction de Kelly
        kelly_adj = kelly_full * self.kelly_fraction
        
        # Limiter au max
        kelly_adj = min(kelly_adj, self.max_bet_fraction)
        
        # Pas de pari n√©gatif
        return max(0.0, kelly_adj)
    
    def calculate_edge(self, p_win: float, odds: float) -> float:
        """Calcule l'edge (avantage) sur le pari."""
        p_implied = 1 / odds
        return p_win - p_implied
    
    def calculate_ev(self, p_win: float, odds: float, stake: float = 1.0) -> float:
        """Calcule l'Expected Value d'un pari."""
        # EV = p_win * (odds - 1) * stake - (1 - p_win) * stake
        return p_win * (odds - 1) * stake - (1 - p_win) * stake
    
    def get_bet_recommendation(self, p_win: float, odds: float, 
                                bankroll: float) -> Dict:
        """
        Retourne une recommandation de pari compl√®te.
        """
        
        kelly = self.calculate_kelly(p_win, odds)
        edge = self.calculate_edge(p_win, odds)
        stake = kelly * bankroll
        ev = self.calculate_ev(p_win, odds, stake)
        
        return {
            "should_bet": kelly > 0,
            "kelly_fraction": kelly,
            "stake": stake,
            "edge": edge,
            "expected_value": ev,
            "odds": odds,
            "p_win": p_win,
            "p_implied": 1 / odds if odds > 0 else 0,
        }


# ===============================================
# BACKTEST ENGINE
# ===============================================

@dataclass
class BetRecord:
    """Enregistrement d'un pari."""
    match_id: str
    date: datetime
    player_A: str
    player_B: str
    bet_on: str  # "A" ou "B"
    p_win: float
    odds: float
    stake: float
    kelly_fraction: float
    edge: float
    result: Optional[str] = None  # "W" ou "L"
    profit: Optional[float] = None
    bankroll_after: Optional[float] = None


@dataclass 
class BacktestResults:
    """R√©sultats complets du backtest."""
    initial_bankroll: float
    final_bankroll: float
    total_bets: int
    winning_bets: int
    losing_bets: int
    total_staked: float
    total_profit: float
    roi: float
    win_rate: float
    avg_odds: float
    avg_stake: float
    max_drawdown: float
    max_drawdown_pct: float
    sharpe_ratio: float
    profit_factor: float
    bankroll_history: List[float] = field(default_factory=list)
    bet_records: List[BetRecord] = field(default_factory=list)
    monthly_returns: Dict[str, float] = field(default_factory=dict)


class BacktestEngine:
    """
    Moteur de backtest pour √©valuer la strat√©gie de betting.
    """
    
    def __init__(self, initial_bankroll: float = INITIAL_BANKROLL,
                 kelly_calculator: KellyCalculator = None,
                 commission: float = COMMISSION):
        self.initial_bankroll = initial_bankroll
        self.bankroll = initial_bankroll
        self.kelly = kelly_calculator or KellyCalculator()
        self.commission = commission
        
        self.bet_records: List[BetRecord] = []
        self.bankroll_history = [initial_bankroll]
        self.peak_bankroll = initial_bankroll
        self.max_drawdown = 0
        self.max_drawdown_pct = 0
    
    def place_bet(self, match_id: str, date: datetime,
                  player_A: str, player_B: str,
                  p_A_wins: float, odds_A: float, odds_B: float,
                  actual_winner: str) -> Optional[BetRecord]:
        """
        √âvalue et place un pari si value bet d√©tect√©.
        
        Args:
            match_id: ID du match
            date: Date du match
            player_A, player_B: Noms des joueurs
            p_A_wins: Notre probabilit√© estim√©e que A gagne
            odds_A, odds_B: Cotes pour A et B
            actual_winner: "A" ou "B" (r√©sultat r√©el)
        
        Returns:
            BetRecord si pari plac√©, None sinon
        """
        
        p_B_wins = 1 - p_A_wins
        
        # √âvaluer les deux c√¥t√©s
        rec_A = self.kelly.get_bet_recommendation(p_A_wins, odds_A, self.bankroll)
        rec_B = self.kelly.get_bet_recommendation(p_B_wins, odds_B, self.bankroll)
        
        # Choisir le meilleur pari (ou aucun)
        if not rec_A["should_bet"] and not rec_B["should_bet"]:
            return None
        
        if rec_A["expected_value"] > rec_B["expected_value"] and rec_A["should_bet"]:
            bet_on = "A"
            rec = rec_A
            odds = odds_A
            p_win = p_A_wins
        elif rec_B["should_bet"]:
            bet_on = "B"
            rec = rec_B
            odds = odds_B
            p_win = p_B_wins
        else:
            return None
        
        # Cr√©er le record
        bet = BetRecord(
            match_id=match_id,
            date=date,
            player_A=player_A,
            player_B=player_B,
            bet_on=bet_on,
            p_win=p_win,
            odds=odds,
            stake=rec["stake"],
            kelly_fraction=rec["kelly_fraction"],
            edge=rec["edge"],
        )
        
        # R√©soudre le pari
        if actual_winner == bet_on:
            # Gagn√©
            gross_profit = bet.stake * (odds - 1)
            net_profit = gross_profit * (1 - self.commission)
            bet.result = "W"
            bet.profit = net_profit
        else:
            # Perdu
            bet.result = "L"
            bet.profit = -bet.stake
        
        # Mettre √† jour le bankroll
        self.bankroll += bet.profit
        bet.bankroll_after = self.bankroll
        
        # Tracker max drawdown
        if self.bankroll > self.peak_bankroll:
            self.peak_bankroll = self.bankroll
        
        current_dd = self.peak_bankroll - self.bankroll
        current_dd_pct = current_dd / self.peak_bankroll if self.peak_bankroll > 0 else 0
        
        if current_dd > self.max_drawdown:
            self.max_drawdown = current_dd
        if current_dd_pct > self.max_drawdown_pct:
            self.max_drawdown_pct = current_dd_pct
        
        # Enregistrer
        self.bet_records.append(bet)
        self.bankroll_history.append(self.bankroll)
        
        return bet
    
    def calculate_sharpe_ratio(self, returns: List[float], 
                                risk_free_rate: float = 0.02) -> float:
        """Calcule le Sharpe Ratio des returns."""
        if len(returns) < 2:
            return 0.0
        
        returns_array = np.array(returns)
        excess_returns = returns_array - risk_free_rate / 252  # Daily risk-free
        
        if excess_returns.std() == 0:
            return 0.0
        
        return float(np.sqrt(252) * excess_returns.mean() / excess_returns.std())
    
    def calculate_profit_factor(self) -> float:
        """Calcule le profit factor (gross wins / gross losses)."""
        gross_wins = sum(b.profit for b in self.bet_records if b.result == "W")
        gross_losses = abs(sum(b.profit for b in self.bet_records if b.result == "L"))
        
        if gross_losses == 0:
            return float('inf') if gross_wins > 0 else 0.0
        
        return gross_wins / gross_losses
    
    def get_results(self) -> BacktestResults:
        """Compile les r√©sultats du backtest."""
        
        if not self.bet_records:
            return BacktestResults(
                initial_bankroll=self.initial_bankroll,
                final_bankroll=self.bankroll,
                total_bets=0, winning_bets=0, losing_bets=0,
                total_staked=0, total_profit=0, roi=0, win_rate=0,
                avg_odds=0, avg_stake=0, max_drawdown=0, max_drawdown_pct=0,
                sharpe_ratio=0, profit_factor=0
            )
        
        winning_bets = [b for b in self.bet_records if b.result == "W"]
        losing_bets = [b for b in self.bet_records if b.result == "L"]
        
        total_staked = sum(b.stake for b in self.bet_records)
        total_profit = self.bankroll - self.initial_bankroll
        
        # Daily returns pour Sharpe
        daily_returns = []
        for i in range(1, len(self.bankroll_history)):
            daily_ret = (self.bankroll_history[i] - self.bankroll_history[i-1]) / self.bankroll_history[i-1]
            daily_returns.append(daily_ret)
        
        # Monthly returns
        monthly_returns = {}
        for bet in self.bet_records:
            month_key = bet.date.strftime("%Y-%m") if isinstance(bet.date, datetime) else str(bet.date)[:7]
            monthly_returns[month_key] = monthly_returns.get(month_key, 0) + (bet.profit or 0)
        
        return BacktestResults(
            initial_bankroll=self.initial_bankroll,
            final_bankroll=self.bankroll,
            total_bets=len(self.bet_records),
            winning_bets=len(winning_bets),
            losing_bets=len(losing_bets),
            total_staked=total_staked,
            total_profit=total_profit,
            roi=total_profit / total_staked if total_staked > 0 else 0,
            win_rate=len(winning_bets) / len(self.bet_records),
            avg_odds=np.mean([b.odds for b in self.bet_records]),
            avg_stake=np.mean([b.stake for b in self.bet_records]),
            max_drawdown=self.max_drawdown,
            max_drawdown_pct=self.max_drawdown_pct,
            sharpe_ratio=self.calculate_sharpe_ratio(daily_returns),
            profit_factor=self.calculate_profit_factor(),
            bankroll_history=self.bankroll_history,
            bet_records=self.bet_records,
            monthly_returns=monthly_returns,
        )


# ===============================================
# VALUE BET FINDER
# ===============================================

class ValueBetFinder:
    """
    Identifie les value bets dans les donn√©es.
    """
    
    def __init__(self, min_edge: float = MIN_EDGE,
                 min_odds: float = MIN_ODDS,
                 max_odds: float = MAX_ODDS):
        self.min_edge = min_edge
        self.min_odds = min_odds
        self.max_odds = max_odds
    
    def find_value_bets(self, df: pl.DataFrame,
                        prob_col: str = "prob_A_wins",
                        odds_A_col: str = "odds_A",
                        odds_B_col: str = "odds_B") -> pl.DataFrame:
        """
        Trouve tous les value bets dans le dataset.
        
        Returns:
            DataFrame avec colonnes value bet ajout√©es
        """
        
        # Calculer edge et value pour les deux c√¥t√©s
        df = df.with_columns([
            # Edge A
            (pl.col(prob_col) - 1 / pl.col(odds_A_col)).alias("edge_A"),
            # Edge B
            ((1 - pl.col(prob_col)) - 1 / pl.col(odds_B_col)).alias("edge_B"),
        ])
        
        # Identifier les value bets
        df = df.with_columns([
            # Value bet on A?
            (
                (pl.col("edge_A") >= self.min_edge) &
                (pl.col(odds_A_col) >= self.min_odds) &
                (pl.col(odds_A_col) <= self.max_odds)
            ).alias("is_value_bet_A"),
            
            # Value bet on B?
            (
                (pl.col("edge_B") >= self.min_edge) &
                (pl.col(odds_B_col) >= self.min_odds) &
                (pl.col(odds_B_col) <= self.max_odds)
            ).alias("is_value_bet_B"),
        ])
        
        # Any value bet
        df = df.with_columns([
            (pl.col("is_value_bet_A") | pl.col("is_value_bet_B")).alias("is_value_bet")
        ])
        
        return df
    
    def get_value_bet_stats(self, df: pl.DataFrame) -> Dict:
        """Statistiques sur les value bets trouv√©s."""
        
        total = len(df)
        value_A = df.filter(pl.col("is_value_bet_A")).shape[0]
        value_B = df.filter(pl.col("is_value_bet_B")).shape[0]
        any_value = df.filter(pl.col("is_value_bet")).shape[0]
        
        return {
            "total_matches": total,
            "value_bets_on_A": value_A,
            "value_bets_on_B": value_B,
            "total_value_bets": any_value,
            "value_bet_rate": any_value / total if total > 0 else 0,
            "avg_edge_A": df.filter(pl.col("is_value_bet_A"))["edge_A"].mean() if value_A > 0 else 0,
            "avg_edge_B": df.filter(pl.col("is_value_bet_B"))["edge_B"].mean() if value_B > 0 else 0,
        }


# ===============================================
# VISUALIZATION
# ===============================================

def plot_backtest_results(results: BacktestResults, output_path: Path):
    """G√©n√®re les graphiques de backtest."""
    
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 1. Bankroll evolution
    ax1 = axes[0, 0]
    ax1.plot(results.bankroll_history, 'b-', linewidth=1.5)
    ax1.axhline(y=results.initial_bankroll, color='r', linestyle='--', alpha=0.5, label='Initial')
    ax1.set_title('Bankroll Evolution', fontsize=12, fontweight='bold')
    ax1.set_xlabel('Bet Number')
    ax1.set_ylabel('Bankroll ($)')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 2. Monthly returns
    ax2 = axes[0, 1]
    if results.monthly_returns:
        months = list(results.monthly_returns.keys())
        returns = list(results.monthly_returns.values())
        colors = ['green' if r >= 0 else 'red' for r in returns]
        ax2.bar(range(len(months)), returns, color=colors, alpha=0.7)
        ax2.set_xticks(range(len(months)))
        ax2.set_xticklabels(months, rotation=45, ha='right')
        ax2.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
    ax2.set_title('Monthly Returns', fontsize=12, fontweight='bold')
    ax2.set_ylabel('Profit ($)')
    ax2.grid(True, alpha=0.3)
    
    # 3. Drawdown
    ax3 = axes[1, 0]
    peak = results.initial_bankroll
    drawdowns = []
    for br in results.bankroll_history:
        if br > peak:
            peak = br
        dd_pct = (peak - br) / peak * 100
        drawdowns.append(dd_pct)
    ax3.fill_between(range(len(drawdowns)), drawdowns, color='red', alpha=0.3)
    ax3.plot(drawdowns, 'r-', linewidth=1)
    ax3.set_title('Drawdown (%)', fontsize=12, fontweight='bold')
    ax3.set_xlabel('Bet Number')
    ax3.set_ylabel('Drawdown %')
    ax3.grid(True, alpha=0.3)
    
    # 4. Bet size distribution
    ax4 = axes[1, 1]
    stakes = [b.stake for b in results.bet_records]
    if stakes:
        ax4.hist(stakes, bins=30, color='blue', alpha=0.7, edgecolor='black')
    ax4.set_title('Bet Size Distribution', fontsize=12, fontweight='bold')
    ax4.set_xlabel('Stake ($)')
    ax4.set_ylabel('Frequency')
    ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.close()
    
    print(f"  üìä Chart saved: {output_path}")


# ===============================================
# MAIN PROCESSING
# ===============================================

def load_data() -> pl.DataFrame:
    """Charge les donn√©es avec pr√©dictions et odds."""
    
    print("\n[1/5] Loading data...")
    
    # Try Monte Carlo predictions first
    mc_path = MC_DIR / "monte_carlo_predictions.parquet"
    if mc_path.exists():
        df = pl.read_parquet(mc_path)
        print(f"  Loaded Monte Carlo predictions: {len(df):,} matches")
        return df
    
    # Fallback to test set
    test_path = DATA_DIR / "test.parquet"
    if test_path.exists():
        df = pl.read_parquet(test_path)
        print(f"  Loaded test set: {len(df):,} matches")
        return df
    
    raise FileNotFoundError("No prediction data found!")


def prepare_backtest_data(df: pl.DataFrame) -> pl.DataFrame:
    """Pr√©pare les donn√©es pour le backtest."""
    
    print("\n[2/5] Preparing backtest data...")
    
    # Identify columns
    prob_col = None
    for candidate in ["mc_p_A_wins", "prob_A_wins", "prediction", "p_A"]:
        if candidate in df.columns:
            prob_col = candidate
            break
    
    if prob_col is None:
        raise ValueError("No probability column found!")
    print(f"  Using probability: {prob_col}")
    
    # Find odds columns
    odds_A_col = None
    odds_B_col = None
    for a_cand in ["odds_A", "odds_implied_prob_A"]:
        if a_cand in df.columns:
            odds_A_col = a_cand
            break
    for b_cand in ["odds_B", "odds_implied_prob_B"]:
        if b_cand in df.columns:
            odds_B_col = b_cand
            break
    
    # If we have implied probs, convert to odds
    if odds_A_col and "implied_prob" in odds_A_col:
        df = df.with_columns([
            (1 / pl.col(odds_A_col)).alias("odds_A_decimal"),
            (1 / pl.col(odds_B_col)).alias("odds_B_decimal"),
        ])
        odds_A_col = "odds_A_decimal"
        odds_B_col = "odds_B_decimal"
    
    if odds_A_col is None:
        print("  ‚ö†Ô∏è No odds columns found, generating synthetic odds...")
        # Generate synthetic odds from probabilities
        df = df.with_columns([
            (1 / pl.col(prob_col).clip(0.1, 0.9) * 0.95).alias("odds_A"),
            (1 / (1 - pl.col(prob_col).clip(0.1, 0.9)) * 0.95).alias("odds_B"),
        ])
        odds_A_col = "odds_A"
        odds_B_col = "odds_B"
    
    print(f"  Using odds: {odds_A_col}, {odds_B_col}")
    
    # Find target column (actual winner)
    target_col = None
    for candidate in ["target_A_wins", "target", "winner"]:
        if candidate in df.columns:
            target_col = candidate
            break
    
    if target_col:
        print(f"  Using target: {target_col}")
    else:
        print("  ‚ö†Ô∏è No target column found, backtest will use predictions only")
    
    # Find date column
    date_col = None
    for candidate in ["tourney_date_ta", "date", "match_date"]:
        if candidate in df.columns:
            date_col = candidate
            break
    
    return df, prob_col, odds_A_col, odds_B_col, target_col, date_col


def run_backtest(df: pl.DataFrame, prob_col: str, odds_A_col: str, 
                 odds_B_col: str, target_col: str, date_col: str) -> BacktestResults:
    """Ex√©cute le backtest complet."""
    
    print("\n[3/5] Running backtest...")
    
    # Initialize engine
    kelly = KellyCalculator(
        kelly_fraction=KELLY_FRACTION,
        min_edge=MIN_EDGE,
        max_bet_fraction=MAX_BET_FRACTION,
        min_odds=MIN_ODDS,
        max_odds=MAX_ODDS
    )
    engine = BacktestEngine(
        initial_bankroll=INITIAL_BANKROLL,
        kelly_calculator=kelly,
        commission=COMMISSION
    )
    
    # Sort by date if available
    if date_col:
        df = df.sort(date_col)
    
    # Convert to list for iteration
    matches = df.to_dicts()
    
    bets_placed = 0
    for i, match in enumerate(matches):
        if i > 0 and i % 10000 == 0:
            print(f"  Processed {i:,} / {len(matches):,} matches, {bets_placed:,} bets placed")
        
        # Extract values
        p_A = match.get(prob_col)
        odds_A = match.get(odds_A_col)
        odds_B = match.get(odds_B_col)
        target = match.get(target_col) if target_col else None
        date = match.get(date_col) if date_col else datetime.now()
        
        # Skip invalid
        if p_A is None or odds_A is None or odds_B is None:
            continue
        if target is None:
            continue
        
        # Convert target to "A" or "B"
        actual_winner = "A" if target == 1 else "B"
        
        # Get match info
        match_id = match.get("custom_match_id", f"match_{i}")
        player_A = match.get("winner_name", "Player A") if target == 1 else match.get("loser_name", "Player A")
        player_B = match.get("loser_name", "Player B") if target == 1 else match.get("winner_name", "Player B")
        
        # Place bet
        bet = engine.place_bet(
            match_id=str(match_id),
            date=date,
            player_A=str(player_A),
            player_B=str(player_B),
            p_A_wins=float(p_A),
            odds_A=float(odds_A),
            odds_B=float(odds_B),
            actual_winner=actual_winner
        )
        
        if bet:
            bets_placed += 1
    
    print(f"  Total bets placed: {bets_placed:,}")
    
    return engine.get_results()


def analyze_results(results: BacktestResults) -> Dict:
    """Analyse d√©taill√©e des r√©sultats."""
    
    print("\n[4/5] Analyzing results...")
    
    analysis = {
        "summary": {
            "initial_bankroll": results.initial_bankroll,
            "final_bankroll": round(results.final_bankroll, 2),
            "total_profit": round(results.total_profit, 2),
            "roi": round(results.roi * 100, 2),
            "total_bets": results.total_bets,
            "winning_bets": results.winning_bets,
            "losing_bets": results.losing_bets,
            "win_rate": round(results.win_rate * 100, 2),
        },
        "risk_metrics": {
            "max_drawdown": round(results.max_drawdown, 2),
            "max_drawdown_pct": round(results.max_drawdown_pct * 100, 2),
            "sharpe_ratio": round(results.sharpe_ratio, 3),
            "profit_factor": round(results.profit_factor, 3),
        },
        "betting_stats": {
            "avg_odds": round(results.avg_odds, 3),
            "avg_stake": round(results.avg_stake, 2),
            "total_staked": round(results.total_staked, 2),
        },
    }
    
    # Edge analysis
    if results.bet_records:
        edges = [b.edge for b in results.bet_records]
        analysis["edge_analysis"] = {
            "avg_edge": round(np.mean(edges) * 100, 2),
            "min_edge": round(np.min(edges) * 100, 2),
            "max_edge": round(np.max(edges) * 100, 2),
        }
        
        # Win rate by edge bucket
        edge_buckets = {"2-5%": [], "5-10%": [], "10%+": []}
        for b in results.bet_records:
            if b.edge < 0.05:
                edge_buckets["2-5%"].append(1 if b.result == "W" else 0)
            elif b.edge < 0.10:
                edge_buckets["5-10%"].append(1 if b.result == "W" else 0)
            else:
                edge_buckets["10%+"].append(1 if b.result == "W" else 0)
        
        analysis["win_rate_by_edge"] = {
            k: round(np.mean(v) * 100, 1) if v else 0 
            for k, v in edge_buckets.items()
        }
    
    return analysis


def main():
    """Pipeline complet Kelly Betting."""
    
    t0 = datetime.now()
    
    # Load data
    df = load_data()
    
    # Prepare
    df, prob_col, odds_A_col, odds_B_col, target_col, date_col = prepare_backtest_data(df)
    
    # Find value bets
    finder = ValueBetFinder(min_edge=MIN_EDGE)
    df = finder.find_value_bets(df, prob_col, odds_A_col, odds_B_col)
    vb_stats = finder.get_value_bet_stats(df)
    
    print(f"\n  Value bet stats:")
    print(f"    Total matches: {vb_stats['total_matches']:,}")
    print(f"    Value bets found: {vb_stats['total_value_bets']:,} ({vb_stats['value_bet_rate']:.1%})")
    print(f"    Avg edge on A: {vb_stats['avg_edge_A']:.2%}")
    print(f"    Avg edge on B: {vb_stats['avg_edge_B']:.2%}")
    
    # Run backtest
    results = run_backtest(df, prob_col, odds_A_col, odds_B_col, target_col, date_col)
    
    # Analyze
    analysis = analyze_results(results)
    
    # Save results
    print("\n[5/5] Saving results...")
    
    # Save analysis JSON
    analysis_path = OUTPUT_DIR / "backtest_analysis.json"
    with open(analysis_path, "w") as f:
        json.dump(analysis, f, indent=2, default=str)
    print(f"  ‚úÖ Analysis saved: {analysis_path}")
    
    # Save bet records
    if results.bet_records:
        records_df = pl.DataFrame([
            {
                "match_id": b.match_id,
                "date": str(b.date),
                "bet_on": b.bet_on,
                "p_win": b.p_win,
                "odds": b.odds,
                "stake": b.stake,
                "edge": b.edge,
                "result": b.result,
                "profit": b.profit,
                "bankroll_after": b.bankroll_after,
            }
            for b in results.bet_records
        ])
        records_path = OUTPUT_DIR / "bet_records.parquet"
        records_df.write_parquet(records_path)
        print(f"  ‚úÖ Bet records saved: {records_path}")
    
    # Generate charts
    chart_path = OUTPUT_DIR / "backtest_charts.png"
    plot_backtest_results(results, chart_path)
    
    elapsed = (datetime.now() - t0).total_seconds()
    
    # Print final summary
    print("\n" + "=" * 70)
    print("   üèÜ PP_20 KELLY BETTING BACKTEST COMPLETE!")
    print("=" * 70)
    print(f"""
üìä R√âSULTATS FINAUX:

   üí∞ PERFORMANCE:
      Initial Bankroll:  ${results.initial_bankroll:,.2f}
      Final Bankroll:    ${results.final_bankroll:,.2f}
      Total Profit:      ${results.total_profit:,.2f}
      ROI:               {results.roi*100:.2f}%
   
   üìà STATISTIQUES:
      Total Bets:        {results.total_bets:,}
      Win Rate:          {results.win_rate*100:.1f}%
      Avg Odds:          {results.avg_odds:.2f}
      Avg Stake:         ${results.avg_stake:.2f}
   
   ‚ö†Ô∏è RISQUE:
      Max Drawdown:      ${results.max_drawdown:.2f} ({results.max_drawdown_pct*100:.1f}%)
      Sharpe Ratio:      {results.sharpe_ratio:.3f}
      Profit Factor:     {results.profit_factor:.2f}
   
   ‚è±Ô∏è Time: {elapsed:.1f}s
   üìÅ Output: {OUTPUT_DIR}
""")
    
    print("=" * 70)
    print("   ‚úÖ PIPELINE GOD SOTA 2026 COMPLET!")
    print("=" * 70)
    print("""
üìã FICHIERS G√âN√âR√âS:
   ‚Ä¢ backtest_analysis.json  : Analyse d√©taill√©e
   ‚Ä¢ bet_records.parquet     : Historique des paris
   ‚Ä¢ backtest_charts.png     : Graphiques de performance

üéØ PARAM√àTRES UTILIS√âS:
   ‚Ä¢ Kelly Fraction: 25% (conservateur)
   ‚Ä¢ Min Edge: 2%
   ‚Ä¢ Max Bet: 5% bankroll
   ‚Ä¢ Commission: 5%

üí° RECOMMANDATIONS:
   1. Si Sharpe > 1.5: Strat√©gie viable
   2. Si Max DD < 20%: Risque acceptable
   3. Si ROI > 5%: Performance excellente
   4. Ajuster Kelly Fraction selon tol√©rance au risque
""")
    
    return results, analysis


if __name__ == "__main__":
    results, analysis = main()