In [75]:
# Ensure notebook can import modules from src/
import sys
from pathlib import Path
import os

# If this notebook lives in /notebooks, jump to repo root
nb_path = Path.cwd().resolve()
repo_root = nb_path.parent if nb_path.name == "notebooks" else nb_path

# Try both strategies:
# 1) if your src/ is a package (has __init__.py) you can add repo root and do "from src import processing"
# 2) if src/ is a folder of modules, add src/ directly and import processing
src_dir = repo_root / "src"
if str(src_dir) not in sys.path:
    sys.path.insert(0, str(src_dir))

# Basic imports used in the demo
import json
import pandas as pd
import numpy as np

# Import your modules (these names must match files inside src/)
import processing     # expects src/processing.py
import analysis       # expects src/analysis.py
# if you have ingestion.py, import it (optional)
try:
    import ingestion
except Exception:
    ingestion = None

print("Repo root:", repo_root)
print("src dir:", src_dir)


Repo root: /Users/davislaroque/player-prop-pricing
src dir: /Users/davislaroque/player-prop-pricing/src


In [76]:
# Loads live data if ODDS_API_KEY exists and ingestion.fetch_* is available,
# otherwise loads data/sample_odds.json for offline demo.

sample_path = repo_root / "data" / "sample_odds.json"
if os.getenv("ODDS_API_KEY") and ingestion is not None:
    # Replace with the actual ingestion function name if different
    print("Loading live data via ingestion (ODDS_API_KEY detected)...")
    try:
        data = ingestion.fetch_player_props(
            sport="basketball_nba",
            markets="h2h",
            regions="us",
            odds_format="decimal"
        )
    except Exception as e:
        print("Failed to fetch live data, falling back to sample. Error:", e)
        with open(sample_path, "r", encoding="utf-8") as f:
            data = json.load(f)
else:
    print("Loading sample data from:", sample_path)
    with open(sample_path, "r", encoding="utf-8") as f:
        data = json.load(f)

# Quick sanity show (first item)
print("Loaded items:", len(data))
if len(data) > 0:
    from pprint import pprint
    pprint({k: data[0].get(k) for k in ("sport_key","home_team","away_team","commence_time")})


Loading live data via ingestion (ODDS_API_KEY detected)...
Loaded items: 44
{'away_team': 'Houston Rockets',
 'commence_time': '2025-10-21T23:30:00Z',
 'home_team': 'Oklahoma City Thunder',
 'sport_key': 'basketball_nba'}


In [77]:
# Flatten TheOddsAPI-like JSON into tidy DataFrame
odds_df = processing.flatten_odds_to_df(data, market="h2h")
print("Flattened rows:", len(odds_df))
# Display first rows for recruiter
display(odds_df.head(10))


Flattened rows: 224


Unnamed: 0,game_id,sport,commence_time,home_team,away_team,bookmaker,last_update,market,outcome,price
0,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,DraftKings,2025-10-08T21:31:28Z,h2h,Houston Rockets,3.6
1,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,DraftKings,2025-10-08T21:31:28Z,h2h,Oklahoma City Thunder,1.31
2,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,FanDuel,2025-10-08T21:31:00Z,h2h,Houston Rockets,3.45
3,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,FanDuel,2025-10-08T21:31:00Z,h2h,Oklahoma City Thunder,1.33
4,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,MyBookie.ag,2025-10-08T21:31:54Z,h2h,Houston Rockets,3.42
5,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,MyBookie.ag,2025-10-08T21:31:54Z,h2h,Oklahoma City Thunder,1.3
6,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,Bovada,2025-10-08T21:30:55Z,h2h,Houston Rockets,3.55
7,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,Bovada,2025-10-08T21:30:55Z,h2h,Oklahoma City Thunder,1.32
8,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,BetRivers,2025-10-08T21:31:19Z,h2h,Houston Rockets,3.55
9,Oklahoma City Thunder_vs_Houston Rockets_2025-...,basketball_nba,2025-10-21T23:30:00Z,Oklahoma City Thunder,Houston Rockets,BetRivers,2025-10-08T21:31:19Z,h2h,Oklahoma City Thunder,1.3


In [78]:
# Converts raw sportsbook odds to implied probabilities, then de-vig them
processed_df = processing.odds_to_probs(odds_df, price_col="price", market_col="game_id")

# Show a concise table for recruiters: original price, decimal odds, implied, devigged
cols = ["game_id", "bookmaker", "outcome", "price", "decimal_odds", "implied_prob", "devig_prob"]
display(processed_df[cols].sort_values(["game_id","outcome","bookmaker"]).reset_index(drop=True))

# Quick check: devig_prob sums to ~1 per game_id
check = processed_df.groupby("game_id")["devig_prob"].sum().reset_index(name="devig_sum")
display(check.head())


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  Detect arbitrage opportunities across games for a given market.


Unnamed: 0,game_id,bookmaker,outcome,price,decimal_odds,implied_prob,devig_prob
0,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,DraftKings,Atlanta Hawks,1.51,1.51,0.662252,0.636145
1,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,DraftKings,Milwaukee Bucks,2.64,2.64,0.378788,0.363855
2,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,BetRivers,Atlanta Hawks,1.41,1.41,0.709220,0.169023
3,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,DraftKings,Atlanta Hawks,1.42,1.42,0.704225,0.167832
4,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,FanDuel,Atlanta Hawks,1.47,1.47,0.680272,0.162124
...,...,...,...,...,...,...,...
219,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,BetMGM,Utah Jazz,3.60,3.60,0.277778,0.053064
220,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,BetRivers,Utah Jazz,3.60,3.60,0.277778,0.053064
221,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,DraftKings,Utah Jazz,3.70,3.70,0.270270,0.051630
222,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,FanDuel,Utah Jazz,4.10,4.10,0.243902,0.046593


Unnamed: 0,game_id,devig_sum
0,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,1.0
1,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,1.0
2,Boston Celtics_vs_Detroit Pistons_2025-11-26T2...,1.0
3,Boston Celtics_vs_Philadelphia 76ers_2025-10-2...,1.0
4,Charlotte Hornets_vs_Brooklyn Nets_2025-10-22T...,1.0


In [79]:
# For best odds we want the best decimal odds per (game_id, outcome)
# Use decimal_odds as the price column for analysis.find_best_odds (or adapt if the function expects 'price')
df_for_analysis = processed_df.copy()
df_for_analysis["price"] = df_for_analysis["decimal_odds"]  # ensure the analysis helper sees decimal odds in 'price'

# Find best odds per outcome across books
best = analysis.find_best_odds(df_for_analysis)
display(best.sort_values(["game_id","outcome"]).reset_index(drop=True))


Unnamed: 0,game_id,sport,commence_time,home_team,away_team,bookmaker,last_update,market,outcome,price,decimal_odds,implied_prob,devig_prob
0,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,basketball_nba,2026-01-19T18:00:00Z,Atlanta Hawks,Milwaukee Bucks,DraftKings,2025-10-08T21:31:29Z,h2h,Atlanta Hawks,1.51,1.51,0.662252,0.636145
1,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,basketball_nba,2026-01-19T18:00:00Z,Atlanta Hawks,Milwaukee Bucks,DraftKings,2025-10-08T21:31:29Z,h2h,Milwaukee Bucks,2.64,2.64,0.378788,0.363855
2,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,basketball_nba,2025-10-22T23:40:00Z,Atlanta Hawks,Toronto Raptors,DraftKings,2025-10-08T21:31:29Z,h2h,Atlanta Hawks,1.42,1.42,0.704225,0.167832
3,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,basketball_nba,2025-10-22T23:40:00Z,Atlanta Hawks,Toronto Raptors,MyBookie.ag,2025-10-08T21:31:54Z,h2h,Atlanta Hawks,1.41,1.41,0.709220,0.169023
4,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,basketball_nba,2025-10-22T23:40:00Z,Atlanta Hawks,Toronto Raptors,BetRivers,2025-10-08T21:31:19Z,h2h,Atlanta Hawks,1.41,1.41,0.709220,0.169023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,basketball_nba,2025-10-23T01:10:00Z,Utah Jazz,Los Angeles Clippers,DraftKings,2025-10-08T21:31:29Z,h2h,Utah Jazz,3.70,3.70,0.270270,0.051630
220,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,basketball_nba,2025-10-23T01:10:00Z,Utah Jazz,Los Angeles Clippers,MyBookie.ag,2025-10-08T21:31:54Z,h2h,Utah Jazz,3.70,3.70,0.270270,0.051630
221,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,basketball_nba,2025-10-23T01:10:00Z,Utah Jazz,Los Angeles Clippers,BetRivers,2025-10-08T21:31:19Z,h2h,Utah Jazz,3.60,3.60,0.277778,0.053064
222,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,basketball_nba,2025-10-23T01:10:00Z,Utah Jazz,Los Angeles Clippers,FanDuel,2025-10-08T21:31:00Z,h2h,Utah Jazz,4.10,4.10,0.243902,0.046593


In [80]:
# detect_arbitrage returns a margin % for the first detected arb (per implementation)
arb_margin = analysis.detect_arbitrage(best)
if arb_margin is None:
    print("No arbitrage detected across games in the dataset.")
else:
    print(f"Arbitrage detected! Approx margin: {arb_margin}%")
    # Show which games have arbitrage using best_per_outcome approach
    best_per_outcome = df_for_analysis.loc[df_for_analysis.groupby(["game_id","outcome"])["price"].idxmax()].copy()
    best_per_outcome["implied_prob"] = 1.0 / best_per_outcome["price"].astype(float)
    summary = best_per_outcome.groupby("game_id").agg({'implied_prob':'sum'}).rename(columns={'implied_prob':'sum_best_implied'})
    summary["arb_margin"] = 1.0 - summary["sum_best_implied"]
    arbs = summary[summary["arb_margin"] > 0]
    display(best_per_outcome, arbs)


No arbitrage detected across games in the dataset.


In [81]:
# Builds a concise, shareable CSV that shows the best book for each outcome and whether the game is an arbitrage
best_per_outcome = df_for_analysis.loc[df_for_analysis.groupby(["game_id","outcome"])["price"].idxmax()].copy()
best_per_outcome["implied_prob"] = 1.0 / best_per_outcome["price"].astype(float)

summary = best_per_outcome.groupby("game_id").agg(
    sum_best_implied = ("implied_prob","sum"),
    outcomes = ("outcome", lambda s: ", ".join(s.unique()))
).reset_index()
summary["arb_margin"] = 1.0 - summary["sum_best_implied"]

# Combine for display
report = best_per_outcome.merge(summary, on="game_id", how="left").sort_values(["game_id","outcome"])
display(report[["game_id","outcome","bookmaker","price","implied_prob","sum_best_implied","arb_margin"]])

# Save CSV without opening notebook
report.to_csv(repo_root / "artifacts" / "demo_best_odds_report.csv", index=False)
print("Report saved to artifacts/demo_best_odds_report.csv")


Unnamed: 0,game_id,outcome,bookmaker,price,implied_prob,sum_best_implied,arb_margin
0,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,Atlanta Hawks,DraftKings,1.51,0.662252,1.041040,-0.041040
1,Atlanta Hawks_vs_Milwaukee Bucks_2026-01-19T18...,Milwaukee Bucks,DraftKings,2.64,0.378788,1.041040,-0.041040
2,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,Atlanta Hawks,FanDuel,1.47,0.680272,1.019255,-0.019255
3,Atlanta Hawks_vs_Toronto Raptors_2025-10-22T23...,Toronto Raptors,DraftKings,2.95,0.338983,1.019255,-0.019255
4,Boston Celtics_vs_Detroit Pistons_2025-11-26T2...,Boston Celtics,DraftKings,1.85,0.540541,1.045591,-0.045591
...,...,...,...,...,...,...,...
83,San Antonio Spurs_vs_Golden State Warriors_202...,San Antonio Spurs,DraftKings,1.91,0.523560,1.047120,-0.047120
84,San Antonio Spurs_vs_Houston Rockets_2025-11-0...,Houston Rockets,DraftKings,1.82,0.549451,1.044500,-0.044500
85,San Antonio Spurs_vs_Houston Rockets_2025-11-0...,San Antonio Spurs,DraftKings,2.02,0.495050,1.044500,-0.044500
86,Utah Jazz_vs_Los Angeles Clippers_2025-10-23T0...,Los Angeles Clippers,BetMGM,1.31,0.763359,1.007261,-0.007261


Report saved to artifacts/demo_best_odds_report.csv
