In [49]:
%load_ext autoreload
%autoreload 2
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
from degen_sim.common.notebook_utils import hide_raw_cells, markdown

In [3]:
hide_raw_cells()

In [51]:
import numpy as np
import pandas as pd
import plotly.express as px

from datetime import datetime
from dateutil.tz import gettz
import os
import time

from dataclasses import dataclass, asdict

In [74]:
from degen_sim.common.constants import ROOT_DIR, DATA_DIR, REPORTS_DIR

In [53]:
def line_scatter(df, x, y, **kwargs):
    fig = px.line(df, x=x, y=y, **kwargs, template="plotly_white")
    for data in fig.data:
        data.update(mode="markers+lines")
    return fig

In [73]:
DATE = "20251007"

In [54]:
nfl_picks = pd.read_csv(os.path.join(DATA_DIR, "parlay_tracker_nfl.csv"))
cfb_picks = pd.read_csv(os.path.join(DATA_DIR, "parlay_tracker_cfb.csv"))

In [55]:
nfl_picks = nfl_picks[~nfl_picks.Win.isna()]

In [56]:
pickers = sorted(set(nfl_picks["Pick"]) | set(cfb_picks["Pick"]))

In [57]:
markdown("## Methodology")
markdown("For each person, we have their realized record (W-L-P), and set of American odds. We first convert the American odds to implied probabilities (assuming no house edge), and then we run a Monte Carlo simulation to see the distribution of number of wins according to the implied probabilities.")
markdown("From this distribution, we can then compute the CDF(W) where W is the realized number of wins, where a higher CDF indicates more skill in picking winning bets, and a lower CDF indicates lower skill in picking winning bets. We simulate using 1,000,000 Monte Carlo trials per person.")

## Methodology

For each person, we have their realized record (W-L-P), and set of American odds. We first convert the American odds to implied probabilities (assuming no house edge), and then we run a Monte Carlo simulation to see the distribution of number of wins according to the implied probabilities.

From this distribution, we can then compute the CDF(W) where W is the realized number of wins, where a higher CDF indicates more skill in picking winning bets, and a lower CDF indicates lower skill in picking winning bets. We simulate using 1,000,000 Monte Carlo trials per person.

In [58]:
@dataclass
class PickInfo:
    """
    Represents a product with a name, price, and quantity in stock.
    """
    name: str
    num_wins: int
    num_losses: int
    num_pushes: int
    odds: list[float]

In [59]:
def american_to_implied_prob(odds):
    """
    Convert American odds to implied probability.

    Parameters:
        odds (int or float): American odds (e.g., +150, -120)

    Returns:
        float: Implied probability as a percentage (e.g., 40.0 for +150)
    """
    if odds > 0:
        prob = 100 / (odds + 100)
    elif odds < 0:
        prob = abs(odds) / (abs(odds) + 100)
    else:
        raise ValueError("Odds cannot be zero")

    return prob


In [60]:
def get_pick_infos(pickers, picks):
    pick_infos = []
    for picker in pickers:
        sub_df = picks[picks.Pick == picker].copy()
        odds = [american_to_implied_prob(odds) for odds in sub_df["Odds"]]
        num_wins = len(sub_df[sub_df["Win"] == "Y"])
        num_losses = len(sub_df[sub_df["Win"] == "N"])
        num_pushes = len(sub_df[sub_df["Win"] == "P"])
        assert num_wins + num_losses + num_pushes == len(sub_df)
        pick_info = PickInfo(name=picker, num_wins=num_wins, num_losses=num_losses, num_pushes=num_pushes, odds=odds)
        pick_infos.append(pick_info)
    return pick_infos

In [61]:
def simulate_pick_info(pick_info, num_trials=100, seed=0):
    np.random.seed(seed)
    
    num_games = pick_info.num_wins + pick_info.num_losses + pick_info.num_pushes
    df = pd.DataFrame({"game_index": range(num_games * num_trials), "p": np.random.uniform(low=0.0, high=1.0, size=num_games * num_trials)})
    df["trial_index"] = df["game_index"] // num_games
    df["implied_prob"] = pick_info.odds * num_trials
    df["win"] = np.where(df["p"] <= df["implied_prob"], 1.0, 0.0)

    total_wins = df.groupby("trial_index")[["win"]].sum().reset_index()
    return total_wins

In [62]:
def compute_cdf(pick_infos):
    cdfs = []
    for pick_info in pick_infos:
        result = simulate_pick_info(pick_info, num_trials=1_000_000)
        num_wins = pick_info.num_wins
        cdf = len(result[result.win <= num_wins]) / len(result)
        cdfs.append(cdf)
    cdf_df = pd.DataFrame([asdict(pick_info) for pick_info in pick_infos])
    cdf_df["cdf"] = cdfs
    return cdf_df.sort_values(by="cdf", ascending=False).drop(columns="odds")

In [63]:
markdown("## Combined (NFL + CFB) Results")

## Combined (NFL + CFB) Results

In [64]:
combined_picks = pd.concat([nfl_picks, cfb_picks])

In [65]:
combined_pick_infos = get_pick_infos(pickers, combined_picks)

In [66]:
combined_cdfs = compute_cdf(combined_pick_infos)

In [67]:
markdown("## NFL Results")

## NFL Results

In [68]:
nfl_pick_infos = get_pick_infos(pickers, nfl_picks)

In [69]:
nfl_cdfs = compute_cdf(nfl_pick_infos)

In [70]:
markdown("## CFB Results")

## CFB Results

In [71]:
cfb_pick_infos = get_pick_infos(pickers, cfb_picks)

In [72]:
cfb_cdfs = compute_cdf(cfb_pick_infos)

In [79]:
with open(os.path.join(REPORTS_DIR, f"degen_sim_{DATE}.md"), "w", encoding="utf-8") as f:
    f.write(f"# Degen Sim Report for {DATE}\n")
    f.write("## Methodology\n")
    f.write("For each person, we have their realized record (W-L-P), and set of American odds. We first convert the American odds to implied probabilities (assuming no house edge), and then we run a Monte Carlo simulation to see the distribution of number of wins according to the implied probabilities.\n")
    f.write("From this distribution, we can then compute the CDF(W) where W is the realized number of wins, where a higher CDF indicates more skill in picking winning bets, and a lower CDF indicates lower skill in picking winning bets. We simulate using 1,000,000 Monte Carlo trials per person.\n")
    f.write("\n")
    
    f.write("## Combined (NFL + CFB) Results\n")
    f.write("\n")
    f.write(combined_cdfs.to_markdown(index=False))
    f.write("\n")

    f.write("## NFL Results\n")
    f.write("\n")
    f.write(nfl_cdfs.to_markdown(index=False))
    f.write("\n")

    f.write("## CFB Results\n")
    f.write("\n")
    f.write(cfb_cdfs.to_markdown(index=False))
    f.write("\n")