In [7]:
# === CapBot v1d – EV + Kelly Stake Sizing ===
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from datetime import datetime
from pathlib import Path
import os

# === Config ===
notebook_dir = Path(__file__).parent if "__file__" in globals() else Path().resolve()
today = datetime.today().strftime("%Y%m%d")
version = "v1d"
version_dir = notebook_dir

summary_xlsx_path = version_dir / f"CapBot_{version}_Report_{today}.xlsx"
readme_path = version_dir / f"CapBot_{version}_README.md"

# === Load Data ===
file_path = version_dir / "../../../data/historical/processed/matches_2015_2025_combined_balanced.csv"
df = pd.read_csv(file_path)
numeric_cols = ['rank_A', 'rank_B', 'pts_A', 'pts_B', 'odds_A', 'odds_B']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
df = df.dropna(subset=numeric_cols + ['winner_code'])

# === Train Once and Predict on Full Dataset ===
X = df[numeric_cols]
y = df['winner_code']
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.20, stratify=y, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

preds_proba = model.predict_proba(X)[:, 1]
preds = (preds_proba > 0.5).astype(int)

full_df = df.copy()
full_df["pred_proba"] = preds_proba
full_df["predicted"] = preds
full_df["correct"] = (preds == y).astype(int)
full_df["target"] = y.values

meta_cols = ['date', 'player_A', 'player_B', 'odds_A']
full_df[meta_cols] = df[meta_cols].reset_index(drop=True)

# === Calculate EV & Kelly ===
full_df["ev"] = full_df["pred_proba"] * full_df["odds_A"] - 1

# Kelly formula: f* = (bp - q) / b, where b = odds - 1, p = pred_proba, q = 1 - p
b = full_df["odds_A"] - 1
p = full_df["pred_proba"]
q = 1 - p
full_df["kelly_fraction"] = (b * p - q) / b
full_df["kelly_fraction"] = full_df["kelly_fraction"].clip(lower=0, upper=1)  # Limit between 0% and 100%

# === Apply EV Filter ===
ev_bets = full_df[full_df["ev"] >= 0.05].copy()
ev_bets = ev_bets[["date", "player_A", "player_B", "odds_A", "pred_proba", "ev", "kelly_fraction", "target"]]
ev_bets["stake"] = ev_bets["kelly_fraction"] * 200  # base stake = $200 unit

ev_bets["profit"] = np.where(
    ev_bets["target"] == 1,
    ev_bets["stake"] * (ev_bets["odds_A"] - 1),
    -ev_bets["stake"]
)

# === Summarize ===
total_bets = len(ev_bets)
total_profit = ev_bets["profit"].sum()
total_staked = ev_bets["stake"].sum()
roi = (total_profit / total_staked) * 100 if total_staked else 0
bet_accuracy = (ev_bets['target'] == (ev_bets['pred_proba'] > 0.5).astype(int)).mean() if total_bets else 0
accuracy = accuracy_score(y, preds)

# === Save Report ===
with pd.ExcelWriter(summary_xlsx_path, engine="xlsxwriter") as writer:
    ev_bets.to_excel(writer, sheet_name="EV_Kelly_Bets", index=False)
    pd.DataFrame([{
        "Run": "Single",
        "Accuracy": round(accuracy, 4),
        "Total Bets": total_bets,
        "Total Staked ($)": round(total_staked, 2),
        "Profit ($)": round(total_profit, 2),
        "ROI (%)": round(roi, 2),
        "Bet Accuracy": round(bet_accuracy, 4)
    }]).to_excel(writer, sheet_name="Summary", index=False)
    
    
#using this as model - to be exported to model dir
import joblib
from pathlib import Path

# Go from /notebooks/versions/ to /capbot/
project_root = Path().resolve().parents[2]
model_dir = project_root / "model"
model_dir.mkdir(parents=True, exist_ok=True)

model_path = model_dir / "CapBot_v1d_model.pkl"
joblib.dump(model, model_path)

print(f"✅ Model saved to: {model_path.resolve()}")

# === Generate README ===
readme = f"""\
# 📄 CapBot {version.upper()} – EV + Kelly Betting Strategy

**Date:** {today}  
**Excel Report:** `{summary_xlsx_path.name}`

---

## 🎯 Strategy
- Train logistic regression on 80% of dataset
- Predict on all matches (100%)
- Calculate:
  - **Expected Value** = (pred_proba × odds_A) - 1
  - **Kelly Fraction** = (b × p - q) / b, clipped to [0, 1]
- Stake = Kelly % × $200 base unit
- Only bet where **EV ≥ 5%**

---

## 📊 Performance Summary
- **Model Accuracy:** {round(accuracy, 4)}
- **EV Bets:** {total_bets} matches
- **Total Staked:** ${round(total_staked, 2)}
- **Profit:** ${round(total_profit, 2)}
- **ROI:** {round(roi, 2)}%
- **Bet Accuracy:** {round(bet_accuracy, 4)}

---

## ✅ Notes
- Bets are dynamically sized based on confidence + odds
- EV ensures edge; Kelly controls risk
- This strategy balances ROI + bankroll growth

---
"""

with open(readme_path, "w", encoding="utf-8") as f:
    f.write(readme)

print(f"📄 README saved to {readme_path.resolve()}")
print(f"📊 XLSX Report saved to {summary_xlsx_path.resolve()}")


  df = pd.read_csv(file_path)


✅ Model saved to: /Users/boroni_4/Documents/CapBot/capbot/model/CapBot_v1d_model.pkl
📄 README saved to /Users/boroni_4/Documents/CapBot/capbot/notebooks/versions/v1d/CapBot_v1d_README.md
📊 XLSX Report saved to /Users/boroni_4/Documents/CapBot/capbot/notebooks/versions/v1d/CapBot_v1d_Report_20250517.xlsx
