In [1]:
%load_ext autoreload
%autoreload 2
%autoreload 2

In [2]:
from degen_sim.common.notebook_utils import hide_raw_cells, markdown

In [3]:
hide_raw_cells()

In [4]:
import numpy as np
import pandas as pd
import plotly.express as px

from datetime import datetime
from dateutil.tz import gettz
import os
import time

from dataclasses import dataclass, asdict

In [5]:
from degen_sim.common.constants import ROOT_DIR, DATA_DIR

In [6]:
def line_scatter(df, x, y, **kwargs):
    fig = px.line(df, x=x, y=y, **kwargs, template="plotly_white")
    for data in fig.data:
        data.update(mode="markers+lines")
    return fig

In [19]:
nfl_picks = pd.read_csv(os.path.join(DATA_DIR, "parlay_tracker_nfl.csv"))
cfb_picks = pd.read_csv(os.path.join(DATA_DIR, "parlay_tracker_cfb.csv"))

In [20]:
nfl_picks = nfl_picks[~nfl_picks.Win.isna()]

In [22]:
pickers = sorted(set(nfl_picks["Pick"]) | set(cfb_picks["Pick"]))

In [23]:
markdown("## Methodology")
markdown("For each person, we have their realized record (W-L-P), and set of American odds. We first convert the American odds to implied probabilities (assuming no house edge), and then we run a Monte Carlo simulation to see the distribution of number of wins according to the implied probabilities.")
markdown("From this distribution, we can then compute the CDF(W) where W is the realized number of wins, where a higher CDF indicates more skill in picking winning bets, and a lower CDF indicates lower skill in picking winning bets. We simulate using 1,000,000 Monte Carlo trials per person.")

## Methodology

For each person, we have their realized record (W-L-P), and set of American odds. We first convert the American odds to implied probabilities (assuming no house edge), and then we run a Monte Carlo simulation to see the distribution of number of wins according to the implied probabilities.

From this distribution, we can then compute the CDF(W) where W is the realized number of wins, where a higher CDF indicates more skill in picking winning bets, and a lower CDF indicates lower skill in picking winning bets. We simulate using 1,000,000 Monte Carlo trials per person.

In [24]:
@dataclass
class PickInfo:
    """
    Represents a product with a name, price, and quantity in stock.
    """
    name: str
    num_wins: int
    num_losses: int
    num_pushes: int
    odds: list[float]

In [25]:
def american_to_implied_prob(odds):
    """
    Convert American odds to implied probability.

    Parameters:
        odds (int or float): American odds (e.g., +150, -120)

    Returns:
        float: Implied probability as a percentage (e.g., 40.0 for +150)
    """
    if odds > 0:
        prob = 100 / (odds + 100)
    elif odds < 0:
        prob = abs(odds) / (abs(odds) + 100)
    else:
        raise ValueError("Odds cannot be zero")

    return prob


In [26]:
def get_pick_infos(pickers, picks):
    pick_infos = []
    for picker in pickers:
        sub_df = picks[picks.Pick == picker].copy()
        odds = [american_to_implied_prob(odds) for odds in sub_df["Odds"]]
        num_wins = len(sub_df[sub_df["Win"] == "Y"])
        num_losses = len(sub_df[sub_df["Win"] == "N"])
        num_pushes = len(sub_df[sub_df["Win"] == "P"])
        assert num_wins + num_losses + num_pushes == len(sub_df)
        pick_info = PickInfo(name=picker, num_wins=num_wins, num_losses=num_losses, num_pushes=num_pushes, odds=odds)
        pick_infos.append(pick_info)
    return pick_infos

In [27]:
def simulate_pick_info(pick_info, num_trials=100, seed=0):
    np.random.seed(seed)
    
    num_games = pick_info.num_wins + pick_info.num_losses + pick_info.num_pushes
    df = pd.DataFrame({"game_index": range(num_games * num_trials), "p": np.random.uniform(low=0.0, high=1.0, size=num_games * num_trials)})
    df["trial_index"] = df["game_index"] // num_games
    df["implied_prob"] = pick_info.odds * num_trials
    df["win"] = np.where(df["p"] <= df["implied_prob"], 1.0, 0.0)

    total_wins = df.groupby("trial_index")[["win"]].sum().reset_index()
    return total_wins

In [28]:
def compute_cdf(pick_infos):
    cdfs = []
    for pick_info in pick_infos:
        result = simulate_pick_info(pick_info, num_trials=1_000_000)
        num_wins = pick_info.num_wins
        cdf = len(result[result.win <= num_wins]) / len(result)
        cdfs.append(cdf)
    cdf_df = pd.DataFrame([asdict(pick_info) for pick_info in pick_infos])
    cdf_df["cdf"] = cdfs
    return cdf_df.sort_values(by="cdf", ascending=False).drop(columns="odds")

In [39]:
markdown("## Combined (NFL + CFB) Results")

## Combined (NFL + CFB) Results

In [36]:
combined_picks = pd.concat([nfl_picks, cfb_picks])

In [37]:
combined_pick_infos = get_pick_infos(pickers, combined_picks)

In [38]:
compute_cdf(combined_pick_infos)

Unnamed: 0,name,num_wins,num_losses,num_pushes,cdf
6,Kunal,5,3,0,0.819242
1,Arun,4,4,0,0.585172
2,Ben,4,4,0,0.577093
9,Ryan,4,4,0,0.540625
5,Daniel,3,5,0,0.290385
8,Michael,3,5,0,0.25365
7,Kyle,3,5,0,0.218466
0,Ananya,2,5,1,0.181041
4,Cristian,3,5,0,0.163229
3,Caleb,1,7,0,0.026593


In [31]:
markdown("## NFL Results")

## NFL Results

In [29]:
nfl_pick_infos = get_pick_infos(pickers, nfl_picks)

In [30]:
compute_cdf(nfl_pick_infos)

Unnamed: 0,name,num_wins,num_losses,num_pushes,cdf
1,Arun,3,1,0,0.92461
6,Kunal,3,1,0,0.918674
0,Ananya,2,2,0,0.672756
2,Ben,2,2,0,0.64616
9,Ryan,2,2,0,0.592147
4,Cristian,2,2,0,0.508063
3,Caleb,1,3,0,0.286183
5,Daniel,1,3,0,0.239734
8,Michael,1,3,0,0.227499
7,Kyle,1,3,0,0.20134


In [32]:
markdown("## CFB Results")

## CFB Results

In [33]:
cfb_pick_infos = get_pick_infos(pickers, cfb_picks)

In [34]:
compute_cdf(cfb_pick_infos)

Unnamed: 0,name,num_wins,num_losses,num_pushes,cdf
6,Kunal,2,2,0,0.661999
5,Daniel,2,2,0,0.6574
9,Ryan,2,2,0,0.649522
2,Ben,2,2,0,0.646482
8,Michael,2,2,0,0.623972
7,Kyle,2,2,0,0.585098
1,Arun,1,3,0,0.279409
4,Cristian,1,3,0,0.177469
0,Ananya,0,3,1,0.094309
3,Caleb,0,4,0,0.050774
