In [1]:
# === CapBot v1c – EV-Based Smart Betting (Train Once, Predict on 100%) ===
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from datetime import datetime
from pathlib import Path
import os

# === Config ===
notebook_dir = Path(__file__).parent if "__file__" in globals() else Path().resolve()
today = datetime.today().strftime("%Y%m%d")
version = "v1c"
version_dir = notebook_dir

summary_xlsx_path = version_dir / f"CapBot_{version}_Report_{today}.xlsx"
readme_path = version_dir / f"CapBot_{version}_README.md"

# === Load Data ===
file_path = version_dir / "../../../data/historical/processed/matches_2015_2025_combined_balanced.csv"
df = pd.read_csv(file_path)
numeric_cols = ['rank_A', 'rank_B', 'pts_A', 'pts_B', 'odds_A', 'odds_B']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')
df[numeric_cols] = df[numeric_cols].fillna(df[numeric_cols].mean())
df = df.dropna(subset=numeric_cols + ['winner_code'])

# === Train Once and Predict on Full Dataset ===
X = df[numeric_cols]
y = df['winner_code']
X_train, _, y_train, _ = train_test_split(X, y, test_size=0.20, stratify=y, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

preds_proba = model.predict_proba(X)[:, 1]
preds = (preds_proba > 0.5).astype(int)

full_df = df.copy()
full_df["pred_proba"] = preds_proba
full_df["predicted"] = preds
full_df["correct"] = (preds == y).astype(int)
full_df["bet"] = 200
full_df["target"] = y.values

meta_cols = ['date', 'player_A', 'player_B', 'odds_A']
full_df[meta_cols] = df[meta_cols].reset_index(drop=True)

# === Calculate Expected Value ===
full_df["ev"] = full_df["pred_proba"] * full_df["odds_A"] - 1

# === Apply EV Filter ===
ev_bets = full_df[full_df["ev"] >= 0.05].copy()  # only bets with 5%+ edge
ev_bets = ev_bets[["date", "player_A", "player_B", "odds_A", "pred_proba", "ev", "target"]]
ev_bets.loc[:, "profit"] = np.where(
    ev_bets["target"] == 1,
    200 * (ev_bets["odds_A"] - 1),
    -200
)

# === Summarize ===
total_bets = len(ev_bets)
total_profit = ev_bets["profit"].sum()
roi = (total_profit / (200 * total_bets)) * 100 if total_bets else 0
bet_accuracy = (ev_bets['target'] == (ev_bets['pred_proba'] > 0.5).astype(int)).mean() if total_bets else 0
accuracy = accuracy_score(y, preds)

# === Save Report ===
with pd.ExcelWriter(summary_xlsx_path, engine="xlsxwriter") as writer:
    ev_bets.to_excel(writer, sheet_name="EV_Bets", index=False)
    pd.DataFrame([{
        "Run": "Single",
        "Accuracy": round(accuracy, 4),
        "Total Bets": total_bets,
        "Profit ($)": round(total_profit, 2),
        "ROI (%)": round(roi, 2),
        "Bet Accuracy": round(bet_accuracy, 4)
    }]).to_excel(writer, sheet_name="Summary", index=False)

# === Generate README ===
readme = f"""\
# 📄 CapBot {version.upper()} – EV Betting Strategy

**Date:** {today}  
**Excel Report:** `{summary_xlsx_path.name}`

---

## 🎯 Strategy
- Train logistic regression model on 80% of dataset
- Predict probabilities on all matches (100%)
- Calculate **Expected Value**: `EV = (pred_proba × odds_A) - 1`
- Keep bets with **EV ≥ 5%**

---

## 📊 Performance Summary
- **Model Accuracy:** {round(accuracy, 4)}
- **Total Bets (EV ≥ 5%):** {total_bets}
- **Bet Accuracy:** {round(bet_accuracy, 4)}
- **Total Profit:** ${round(total_profit, 2)}
- **ROI:** {round(roi, 2)}%

---

## ✅ Notes
- This version filters strictly based on **positive expected value**
- Odds and probabilities are both required to justify each pick
- Suitable for real-world betting where value beats volume

---
"""

with open(readme_path, "w", encoding="utf-8") as f:
    f.write(readme)

print(f"📄 README saved to {readme_path.resolve()}")
print(f"📊 XLSX Report saved to {summary_xlsx_path.resolve()}")


  df = pd.read_csv(file_path)


📄 README saved to /Users/boroni_4/Documents/CapBot/capbot/notebooks/versions/v1c/CapBot_v1c_README.md
📊 XLSX Report saved to /Users/boroni_4/Documents/CapBot/capbot/notebooks/versions/v1c/CapBot_v1c_Report_20250516.xlsx
