In [None]:
import pandas as pd
import numpy as np
import joblib
import warnings

warnings.filterwarnings('ignore')

print("=======================================================")
print("   ü§ñ QUANTITATIVE BETTING PORTFOLIO MANAGER v1.0      ")
print("=======================================================")

# ==============================================================================
# 1. USER CONFIGURATION (EDIT THIS PART)
# ==============================================================================
BANKROLL = 100.00        # Total betting budget ($)
RISK_MODE = "Sensible"    # Options: "Aggressive" (0.5), "Sensible" (0.25), "Conservative" (0.1)
MIN_EDGE = 0.05           # Minimum 5% ROI required to place a bet
MAX_STAKE_PCT = 0.06      # Max 6% of bankroll on a single match (Risk management)
MIN_PROBABILITY = 0.25    # Ignore outcomes with <20% chance (Reduces variance)

# --- MANUAL FIXTURE LIST (Team Names must match your CSV data) ---
fixtures = [
    # Format: {"Home": "Team A", "Away": "Team B", "Odds_1": HomeOdds, "Odds_X": DrawOdds, "Odds_2": AwayOdds}
    {"Home": "Manchester United", "Away": "Newcastle United", "Odds_1": 2.53, "Odds_X": 3.61, "Odds_2": 2.68},
    {"Home": "Nottingham Forest", "Away": "Manchester City", "Odds_1": 5.29, "Odds_X": 4.45, "Odds_2": 1.58},
    {"Home": "Arsenal",           "Away": "Brighton & Hove Albion", "Odds_1": 1.40, "Odds_X": 4.92, "Odds_2": 7.85},
    {"Home": "Brentford",         "Away": "Bournemouth",      "Odds_1": 2.29, "Odds_X": 3.60, "Odds_2": 3.03},
    {"Home": "Burnley",           "Away": "Everton",          "Odds_1": 4.01, "Odds_X": 3.35, "Odds_2": 2.01},
    {"Home": "Liverpool",         "Away": "Wolverhampton Wanderers", "Odds_1": 1.24, "Odds_X": 6.59, "Odds_2": 11.28},
    {"Home": "West Ham United",   "Away": "Fulham",           "Odds_1": 2.68, "Odds_X": 3.41, "Odds_2": 2.63},
    {"Home": "Chelsea",           "Away": "Aston Villa",      "Odds_1": 1.85, "Odds_X": 3.88, "Odds_2": 4.03},
    {"Home": "Sunderland",        "Away": "Leeds United",     "Odds_1": 2.58, "Odds_X": 3.23, "Odds_2": 2.86},
    {"Home": "Crystal Palace",    "Away": "Tottenham Hotspur", "Odds_1": 2.29, "Odds_X": 3.30, "Odds_2": 3.25},
]

# ==============================================================================
# 2. SYSTEM BACKEND (DO NOT TOUCH)
# ==============================================================================

# Risk Multiplier Logic
kelly_fractions = {"Aggressive": 0.5, "Sensible": 0.25, "Conservative": 0.1}
KELLY_F = kelly_fractions.get(RISK_MODE, 0.25)

try:
    artifacts = joblib.load('football_model_final.pkl')
    model = artifacts['model']
    features = artifacts['features']
    current_elos = artifacts['elo_dict']
    df_recent = artifacts['df_recent']
    print(f"‚úÖ Model Loaded. Analyzing {len(fixtures)} upcoming matches...")
except FileNotFoundError:
    print("‚ùå Critical Error: 'football_model_final.pkl' missing.")
    exit()

def get_stats(team):
    # Find latest data point for the team
    rows = df_recent[(df_recent['home_team_name'] == team) | (df_recent['away_team_name'] == team)]
    if len(rows) == 0: return None
    last = rows.sort_values('date').iloc[-1]
    
    prefix = 'home_' if last['home_team_name'] == team else 'away_'
    stats = {}
    
    # Extract rolling features
    roll_feats = ['team_xg', 'team_possession', 'shots_onTarget', 'corners', 'team_points','fouls']
    for f in roll_feats:
        col = f"{prefix}roll_{f}"
        if col in last: stats[f] = last[col]
        else: stats[f] = 0
    return stats

# ==============================================================================
# 3. ANALYSIS LOOP
# ==============================================================================
opportunities = []

for f in fixtures:
    h_team, a_team = f['Home'], f['Away']
    
    # 1. Fetch Data
    h_stats = get_stats(h_team)
    a_stats = get_stats(a_team)
    h_elo = current_elos.get(h_team, 1500)
    a_elo = current_elos.get(a_team, 1500)
    
    if not h_stats or not a_stats:
        print(f"‚ö†Ô∏è  Missing data for {h_team} vs {a_team}. Skipping.")
        continue

    # 2. Build Feature Vector (Exact match to training)
    input_data = {
        'diff_elo': (h_elo + 65) - a_elo,
        'home_elo': h_elo,
        'away_elo': a_elo,
        'diff_rest': 0, # Neutral assumption
        'home_roll_team_xg': h_stats['team_xg'],
        'away_roll_team_xg': a_stats['team_xg']
    }
    
    roll_feats = ['team_xg', 'team_possession', 'shots_onTarget', 'corners', 'team_points']
    for feat in roll_feats:
        input_data[f"diff_{feat}"] = h_stats[feat] - a_stats[feat]

    # Predict
    input_df = pd.DataFrame([input_data]).reindex(columns=features, fill_value=0)
    probs = model.predict_proba(input_df)[0]
    
    # Probabilities: [Away, Draw, Home] (Standard sklearn order for 0,1,2 classes)
    # Check your model encoding! Assuming: 0=Away, 1=Draw, 2=Home based on previous code
    p_away, p_draw, p_home = probs[0], probs[1], probs[2]
    
    # 3. Find Value (Compare vs Odds)
    bets_for_match = []
    
    # Option 1: Home
    edge_home = (p_home * f['Odds_1']) - 1
    if edge_home > MIN_EDGE and p_home > MIN_PROBABILITY:
        bets_for_match.append({'Type': 'HOME', 'Team': h_team, 'Odds': f['Odds_1'], 'Prob': p_home, 'Edge': edge_home})
        
    # Option 2: Draw
    edge_draw = (p_draw * f['Odds_X']) - 1
    if edge_draw > MIN_EDGE and p_draw > 0.25:
        bets_for_match.append({'Type': 'DRAW', 'Team': 'Draw', 'Odds': f['Odds_X'], 'Prob': p_draw, 'Edge': edge_draw})
        
    # Option 3: Away
    edge_away = (p_away * f['Odds_2']) - 1
    if edge_away > MIN_EDGE and p_away > MIN_PROBABILITY:
        bets_for_match.append({'Type': 'AWAY', 'Team': a_team, 'Odds': f['Odds_2'], 'Prob': p_away, 'Edge': edge_away})
    
    # 4. Pick Best Bet for this match (Avoid conflicting bets)
    if bets_for_match:
        # Sort by Edge and pick the best one
        best_bet = sorted(bets_for_match, key=lambda x: x['Edge'], reverse=True)[0]
        best_bet['Match'] = f"{h_team} vs {a_team}"
        opportunities.append(best_bet)

# ==============================================================================
# 4. PORTFOLIO ALLOCATION (KELLY CRITERION)
# ==============================================================================
print("\n‚öôÔ∏è Calculating Optimal Stakes (Kelly Criterion)...")

portfolio = pd.DataFrame(opportunities)

if not portfolio.empty:
    def calculate_stake(row):
        # Kelly Formula: f = (bp - q) / b
        # b = odds - 1
        b = row['Odds'] - 1
        p = row['Prob']
        q = 1 - p
        
        f = ((b * p) - q) / b
        
        # Apply Safety Multipliers
        f_safe = f * KELLY_F
        
        # Apply Max Cap
        return max(0, min(f_safe, MAX_STAKE_PCT))

    portfolio['Kelly_Pct'] = portfolio.apply(calculate_stake, axis=1)
    portfolio['Stake'] = portfolio['Kelly_Pct'] * BANKROLL
    portfolio['Est_Return'] = portfolio['Stake'] * (portfolio['Odds'] - 1)
    
    # Filter out zero stakes (if any negative edges slipped through)
    portfolio = portfolio[portfolio['Stake'] > 0.5].sort_values(by='Stake', ascending=False)
    
    total_invested = 0
    total_ev = 0
    
    # 2. Rename columns for clean display
    display_df = portfolio.copy()
    display_df = display_df[['Match', 'Type', 'Team', 'Odds', 'Prob', 'Edge', 'Stake', 'Est_Return']]
    display_df.columns = ['Match', 'Bet Type', 'Team Selection', 'Odds', 'Win Prob', 'Edge (ROI)', 'Stake ($)', 'Pot. Profit ($)']
    
    # 3. Apply Professional Quant Styling
    styled_table = (display_df.style
        .format({
            'Odds': '{:.2f}',
            'Win Prob': '{:.1%}',
            'Edge (ROI)': '{:.1%}',
            'Stake ($)': '${:.2f}',
            'Pot. Profit ($)': '${:.2f}'
        })
        # Green background for higher Probability (Confidence)
        .background_gradient(subset=['Win Prob'], cmap='Greens', vmin=0.2, vmax=0.8)
        
        # Blue background for Edge (Value)
        .background_gradient(subset=['Edge (ROI)'], cmap='Blues', vmin=0.05, vmax=0.5)
        
        # Data Bars for Stake (Visualizing Money Allocation)
        .bar(subset=['Stake ($)'], color='#d65f5f', vmin=0)
        
        # Bold the Team Name
        .set_properties(subset=['Team Selection'], **{'font-weight': 'bold'})
        
        .set_caption("üöÄ OPTIMIZED QUANT PORTFOLIO")
        .set_table_styles([
            {'selector': 'th', 'props': [('font-size', '12px'), ('text-align', 'center'), ('background-color', '#f4f4f4')]},
            {'selector': 'td', 'props': [('text-align', 'center'), ('padding', '8px')]},
            {'selector': 'caption', 'props': [('font-size', '16px'), ('font-weight', 'bold'), ('color', '#333')]}
        ])
    )
    
    # 4. Display
    from IPython.display import display
    display(styled_table)

    # 5. Print Summary Footer (Plain text is better for totals)
    total_inv = portfolio['Stake'].sum()
    print(f"\nüìä PORTFOLIO SUMMARY")
    print(f"üí∞ Total Risk:     ${total_inv:.2f}")
    print(f"üíµ Bankroll Left:  ${BANKROLL - total_inv:.2f}")

else:
    print("\nüìâ No Value Bets Found.")
    print("The model thinks the bookies have priced all these games correctly (or you need lower thresholds).")

   ü§ñ QUANTITATIVE BETTING PORTFOLIO MANAGER v1.0      
‚úÖ Model Loaded. Analyzing 10 upcoming matches...

‚öôÔ∏è Calculating Optimal Stakes (Kelly Criterion)...


Unnamed: 0,Match,Bet Type,Team Selection,Odds,Win Prob,Edge (ROI),Stake ($),Pot. Profit ($)
4,Chelsea vs Aston Villa,AWAY,Aston Villa,4.03,48.2%,94.4%,$6.00,$18.18
2,Liverpool vs Wolverhampton Wanderers,AWAY,Wolverhampton Wanderers,11.28,25.8%,190.5%,$4.63,$47.62
1,Arsenal vs Brighton & Hove Albion,AWAY,Brighton & Hove Albion,7.85,25.8%,102.2%,$3.73,$25.54
0,Manchester United vs Newcastle United,AWAY,Newcastle United,2.68,45.9%,23.1%,$3.43,$5.76
3,West Ham United vs Fulham,AWAY,Fulham,2.63,46.2%,21.4%,$3.28,$5.35
6,Crystal Palace vs Tottenham Hotspur,AWAY,Tottenham Hotspur,3.25,37.7%,22.4%,$2.49,$5.61
5,Sunderland vs Leeds United,AWAY,Leeds United,2.86,36.9%,5.5%,$0.74,$1.37



üìä PORTFOLIO SUMMARY
üí∞ Total Risk:     $24.30
üíµ Bankroll Left:  $75.70


In [20]:
# ==========================================
# MATCH EXPLAINER (Why did the model bet?)
# ==========================================

def explain_match(home_team, away_team):
    print(f"üïµÔ∏è DETECTIVE MODE: {home_team} vs {away_team}")
    
    # 1. Get Data (Reusing logic from engine)
    h_stats = get_stats(home_team)
    a_stats = get_stats(away_team)
    h_elo = current_elos.get(home_team, 1500)
    a_elo = current_elos.get(away_team, 1500)
    
    if not h_stats or not a_stats:
        print("‚ùå Data missing for one of the teams.")
        return

    # 2. Build Comparison Table
    comparison_data = []
    
    # A. ELO Rating
    # Add Home Adv (+65) to Home ELO to show "Effective ELO"
    effective_h_elo = h_elo + 65
    elo_diff = effective_h_elo - a_elo
    adv = home_team if elo_diff > 0 else away_team
    
    comparison_data.append({
        'Feature': 'ELO Rating (Power)', 
        f'{home_team}': int(h_elo), 
        f'{away_team}': int(a_elo), 
        'Difference': f"{int(elo_diff)}",
        'Advantage': adv
    })
    
    # B. Rolling Stats
    # Key stats the model cares about
    keys = {
        'team_points': 'Form (Avg Points)',
        'team_xg': 'Attack (Avg xG)',
        'team_possession': 'Control (Possession)',
        'shots_onTarget': 'Danger (Shots on Target)',
        'corners': 'Pressure (Corners)',
        'fouls': 'Aggression (Fouls)'
    }
    
    for k, label in keys.items():
        h_val = h_stats.get(k, 0)
        a_val = a_stats.get(k, 0)
        diff = h_val - a_val
        
        # Determine who is better
        # For Fouls, usually "Less" is not necessarily better or worse, but let's assume higher stats = advantage for others
        if k == 'fouls':
            adv_team = "-"
        else:
            adv_team = home_team if h_val > a_val else away_team
            
        comparison_data.append({
            'Feature': label, 
            f'{home_team}': round(h_val, 2), 
            f'{away_team}': round(a_val, 2), 
            'Difference': round(diff, 2),
            'Advantage': adv_team
        })

    # 3. Create DataFrame
    comp_df = pd.DataFrame(comparison_data)
    
    # 4. Display with Styling
    def highlight_advantage(row):
        # Color the advantage column green/red logic could go here
        # For simplicity, we bold the advantage
        return ['' for _ in row]

    styler = (comp_df.style
        .set_caption(f"üìä STATISTICAL FACE-OFF: {home_team} vs {away_team}")
        .hide(axis="index")
        .set_table_styles([
            {'selector': 'th', 'props': [('background-color', '#404040'), ('color', 'white'), ('font-weight', 'bold')]},
            {'selector': 'td', 'props': [('text-align', 'center')]},
        ])
        .background_gradient(subset=['Difference'], cmap='coolwarm', vmin=-1, vmax=1)
    )
    
    display(styler)
    
    # 5. NARRATIVE EXPLANATION
    print("\nüß† MODEL REASONING:")
    
    # Check ELO
    if abs(elo_diff) < 50:
        print(f"1. ELO: Teams are rated very similarly (Diff {int(elo_diff)}).")
    elif elo_diff > 50:
        print(f"1. ELO: {home_team} is significantly stronger on paper.")
    else:
        print(f"1. ELO: {away_team} is rated higher despite playing away.")
        
    # Check Form (Points)
    p_diff = h_stats['team_points'] - a_stats['team_points']
    if p_diff < -0.5:
        print(f"2. FORM: {away_team} is in much better form ({a_stats['team_points']:.1f} pts/game vs {h_stats['team_points']:.1f}).")
        print(f"   -> This is likely why the model picked the Away win.")
    elif p_diff > 0.5:
        print(f"2. FORM: {home_team} is dominating recent games.")
    else:
        print(f"2. FORM: Both teams have similar recent results.")
        
    # Check xG
    xg_diff = h_stats['team_xg'] - a_stats['team_xg']
    if xg_diff < -0.3:
        print(f"3. PERFORMANCE: {away_team} is creating far better chances (xG gap: {xg_diff:.2f}).")
        print("   -> Even if they lost recently, the model sees they are playing well (Underlying Stats).")
    print("\n\n")

In [19]:
# ==========================================
# TEST IT ON YOUR BETS
# ==========================================
# Explain the specific bet you were curious about
for fixture in fixtures:
    explain_match(fixture['Home'], fixture['Away'])

üïµÔ∏è DETECTIVE MODE: Manchester United vs Newcastle United


Feature,Manchester United,Newcastle United,Difference,Advantage
ELO Rating (Power),1537.0,1559.0,43.0,Manchester United
Form (Avg Points),1.6,1.6,0.0,Newcastle United
Attack (Avg xG),2.42,1.52,0.9,Manchester United
Control (Possession),0.62,0.55,0.07,Manchester United
Danger (Shots on Target),6.2,4.8,1.4,Manchester United
Pressure (Corners),6.6,7.6,-1.0,Newcastle United
Aggression (Fouls),13.2,8.2,5.0,-



üß† MODEL REASONING:
1. ELO: Teams are rated very similarly (Diff 43).
2. FORM: Both teams have similar recent results.



üïµÔ∏è DETECTIVE MODE: Nottingham Forest vs Manchester City


Feature,Nottingham Forest,Manchester City,Difference,Advantage
ELO Rating (Power),1487.0,1618.0,-65.0,Manchester City
Form (Avg Points),1.8,3.0,-1.2,Manchester City
Attack (Avg xG),1.2,2.2,-1.0,Manchester City
Control (Possession),0.53,0.55,-0.02,Manchester City
Danger (Shots on Target),4.8,5.6,-0.8,Manchester City
Pressure (Corners),5.8,3.6,2.2,Nottingham Forest
Aggression (Fouls),12.0,9.4,2.6,-



üß† MODEL REASONING:
1. ELO: Manchester City is rated higher despite playing away.
2. FORM: Manchester City is in much better form (3.0 pts/game vs 1.8).
   -> This is likely why the model picked the Away win.
3. PERFORMANCE: Manchester City is creating far better chances (xG gap: -1.00).
   -> Even if they lost recently, the model sees they are playing well (Underlying Stats).



üïµÔ∏è DETECTIVE MODE: Arsenal vs Brighton & Hove Albion


Feature,Arsenal,Brighton & Hove Albion,Difference,Advantage
ELO Rating (Power),1677.0,1552.0,189.0,Arsenal
Form (Avg Points),2.4,1.4,1.0,Arsenal
Attack (Avg xG),1.92,1.74,0.18,Arsenal
Control (Possession),0.6,0.57,0.03,Arsenal
Danger (Shots on Target),5.4,4.2,1.2,Arsenal
Pressure (Corners),4.6,5.2,-0.6,Brighton & Hove Albion
Aggression (Fouls),7.2,11.4,-4.2,-



üß† MODEL REASONING:
1. ELO: Arsenal is significantly stronger on paper.
2. FORM: Arsenal is dominating recent games.



üïµÔ∏è DETECTIVE MODE: Brentford vs Bournemouth


Feature,Brentford,Bournemouth,Difference,Advantage
ELO Rating (Power),1529.0,1482.0,112.0,Brentford
Form (Avg Points),0.8,0.6,0.2,Brentford
Attack (Avg xG),1.0,1.9,-0.9,Bournemouth
Control (Possession),0.47,0.54,-0.08,Bournemouth
Danger (Shots on Target),2.0,6.2,-4.2,Bournemouth
Pressure (Corners),4.0,4.4,-0.4,Bournemouth
Aggression (Fouls),7.6,12.6,-5.0,-



üß† MODEL REASONING:
1. ELO: Brentford is significantly stronger on paper.
2. FORM: Both teams have similar recent results.
3. PERFORMANCE: Bournemouth is creating far better chances (xG gap: -0.90).
   -> Even if they lost recently, the model sees they are playing well (Underlying Stats).



üïµÔ∏è DETECTIVE MODE: Burnley vs Everton


Feature,Burnley,Everton,Difference,Advantage
ELO Rating (Power),1426.0,1498.0,-6.0,Everton
Form (Avg Points),0.0,1.8,-1.8,Everton
Attack (Avg xG),1.24,0.94,0.3,Burnley
Control (Possession),0.47,0.42,0.05,Burnley
Danger (Shots on Target),3.2,2.6,0.6,Burnley
Pressure (Corners),5.8,5.6,0.2,Burnley
Aggression (Fouls),9.6,11.0,-1.4,-



üß† MODEL REASONING:
1. ELO: Teams are rated very similarly (Diff -6).
2. FORM: Everton is in much better form (1.8 pts/game vs 0.0).
   -> This is likely why the model picked the Away win.



üïµÔ∏è DETECTIVE MODE: Liverpool vs Wolverhampton Wanderers


Feature,Liverpool,Wolverhampton Wanderers,Difference,Advantage
ELO Rating (Power),1517.0,1389.0,192.0,Liverpool
Form (Avg Points),2.2,0.0,2.2,Liverpool
Attack (Avg xG),1.54,0.8,0.74,Liverpool
Control (Possession),0.55,0.43,0.13,Liverpool
Danger (Shots on Target),4.8,2.0,2.8,Liverpool
Pressure (Corners),3.6,1.6,2.0,Liverpool
Aggression (Fouls),11.4,17.0,-5.6,-



üß† MODEL REASONING:
1. ELO: Liverpool is significantly stronger on paper.
2. FORM: Liverpool is dominating recent games.



üïµÔ∏è DETECTIVE MODE: West Ham United vs Fulham


Feature,West Ham United,Fulham,Difference,Advantage
ELO Rating (Power),1468.0,1493.0,39.0,West Ham United
Form (Avg Points),0.6,1.2,-0.6,Fulham
Attack (Avg xG),0.78,1.03,-0.25,Fulham
Control (Possession),0.35,0.49,-0.13,Fulham
Danger (Shots on Target),2.4,3.8,-1.4,Fulham
Pressure (Corners),5.2,4.8,0.4,West Ham United
Aggression (Fouls),11.4,7.4,4.0,-



üß† MODEL REASONING:
1. ELO: Teams are rated very similarly (Diff 39).
2. FORM: Fulham is in much better form (1.2 pts/game vs 0.6).
   -> This is likely why the model picked the Away win.



üïµÔ∏è DETECTIVE MODE: Chelsea vs Aston Villa


Feature,Chelsea,Aston Villa,Difference,Advantage
ELO Rating (Power),1552.0,1599.0,17.0,Chelsea
Form (Avg Points),1.4,3.0,-1.6,Aston Villa
Attack (Avg xG),1.34,1.3,0.04,Chelsea
Control (Possession),0.58,0.52,0.07,Chelsea
Danger (Shots on Target),4.2,5.8,-1.6,Aston Villa
Pressure (Corners),4.0,4.6,-0.6,Aston Villa
Aggression (Fouls),13.2,11.8,1.4,-



üß† MODEL REASONING:
1. ELO: Teams are rated very similarly (Diff 17).
2. FORM: Aston Villa is in much better form (3.0 pts/game vs 1.4).
   -> This is likely why the model picked the Away win.



üïµÔ∏è DETECTIVE MODE: Sunderland vs Leeds United


Feature,Sunderland,Leeds United,Difference,Advantage
ELO Rating (Power),1512.0,1467.0,109.0,Sunderland
Form (Avg Points),1.4,1.0,0.4,Sunderland
Attack (Avg xG),0.62,1.7,-1.08,Leeds United
Control (Possession),0.4,0.41,-0.01,Leeds United
Danger (Shots on Target),2.8,3.8,-1.0,Leeds United
Pressure (Corners),4.2,3.2,1.0,Sunderland
Aggression (Fouls),10.4,12.4,-2.0,-



üß† MODEL REASONING:
1. ELO: Sunderland is significantly stronger on paper.
2. FORM: Both teams have similar recent results.
3. PERFORMANCE: Leeds United is creating far better chances (xG gap: -1.08).
   -> Even if they lost recently, the model sees they are playing well (Underlying Stats).



üïµÔ∏è DETECTIVE MODE: Crystal Palace vs Tottenham Hotspur


Feature,Crystal Palace,Tottenham Hotspur,Difference,Advantage
ELO Rating (Power),1564.0,1542.0,86.0,Crystal Palace
Form (Avg Points),1.4,1.4,0.0,Tottenham Hotspur
Attack (Avg xG),1.98,1.36,0.62,Crystal Palace
Control (Possession),0.48,0.53,-0.05,Tottenham Hotspur
Danger (Shots on Target),5.4,3.8,1.6,Crystal Palace
Pressure (Corners),6.0,6.8,-0.8,Tottenham Hotspur
Aggression (Fouls),10.4,11.2,-0.8,-



üß† MODEL REASONING:
1. ELO: Crystal Palace is significantly stronger on paper.
2. FORM: Both teams have similar recent results.



