In [48]:
import pandas as pd
import s3fs
import requests
from bs4 import BeautifulSoup
import re
import time
import json

import datetime
from dotenv import load_dotenv
import os

import warnings
warnings.filterwarnings("ignore")

load_dotenv()

API_KEY = os.getenv("ODDS_API_KEY")

In [49]:
def slug(s):
    s = s.strip().lower()
    s = re.sub(r'[^a-z0-9]+', '_', s)
    return re.sub(r'_+', '_', s).strip('_')

api_url = (
    f"https://api.the-odds-api.com/v4/sports/basketball_ncaab/odds"
    f"?regions=us,us2,eu,uk&markets=h2h,spreads,totals&oddsFormat=american&apiKey={API_KEY}"
)
resp = requests.get(api_url)
resp.raise_for_status()
games = json.loads(resp.text)

moneyline_rows = []
spreads_rows = []
totals_rows = []

for g in games:
    base = {
        "game_id": g["id"],
        "home_team": g["home_team"],
        "away_team": g["away_team"],
        "commence_time": g["commence_time"],
    }
    ml_row = base.copy()
    sp_row = base.copy()
    tot_row = base.copy()

    # Temp holders to compute consensus/best
    home_ml = {}   # {book: price}
    away_ml = {}   # {book: price}
    home_sp = {}   # {book: point}  (spread point for HOME side)
    # If you'd rather use AWAY side spreads, build away_sp analogously
    tot_pts = {}   # {book: total points (from Over)}

    # Also keep odds for “best over/under” picks on totals
    over_odds = {}   # {book: odds}
    under_odds = {}  # {book: odds}

    for bm in g.get("bookmakers", []):
        book = slug(bm.get("title") or bm.get("key", ""))
        markets = {m["key"]: m for m in bm.get("markets", [])}

        # --- MONEYLINE ---
        if "h2h" in markets:
            for o in markets["h2h"].get("outcomes", []):
                if o["name"] == g["home_team"]:
                    home_ml[book] = o.get("price")
                elif o["name"] == g["away_team"]:
                    away_ml[book] = o.get("price")

        # --- SPREADS (home side) ---
        if "spreads" in markets:
            outs = markets["spreads"].get("outcomes", [])
            o_home = next((x for x in outs if x.get("name") == g["home_team"]), None)
            if o_home and (o_home.get("point") is not None):
                home_sp[book] = o_home["point"]

        # --- TOTALS ---
        if "totals" in markets:
            outs = markets["totals"].get("outcomes", [])
            o_over = next((x for x in outs if x.get("name","").lower()=="over"), None)
            o_under = next((x for x in outs if x.get("name","").lower()=="under"), None)
            # Use the Over's point as "the" total line (books nearly always equal O/U)
            if o_over and (o_over.get("point") is not None):
                tot_pts[book] = o_over["point"]
                over_odds[book] = o_over.get("price")
            if o_under:
                under_odds[book] = o_under.get("price")

    # --- populate columns with only "value of the line" ---
    # MONEYLINE: we expose two columns per book so you can compare across books for each side
    for book in sorted(set(home_ml.keys()) | set(away_ml.keys())):
        ml_row[f"{book}_home"] = home_ml.get(book, np.nan)
        ml_row[f"{book}_away"] = away_ml.get(book, np.nan)

    # SPREADS: one column per book holding the HOME spread point value
    for book in sorted(home_sp.keys()):
        sp_row[book] = home_sp[book]

    # TOTALS: one column per book holding the total points number
    for book in sorted(tot_pts.keys()):
        tot_row[book] = tot_pts[book]

    # --- consensus + best lines ---
    # MONEYLINE consensus (median across books)
    if home_ml:
        ml_row["consensus_home_ml"] = float(np.nanmedian(list(home_ml.values())))
        # Best for backers = highest American odds number
        best_home_book = max(home_ml, key=lambda b: home_ml[b])
        ml_row["best_home_ml_book"] = best_home_book
        ml_row["best_home_ml"] = home_ml[best_home_book]
    else:
        ml_row["consensus_home_ml"] = np.nan
        ml_row["best_home_ml_book"] = None
        ml_row["best_home_ml"] = np.nan

    if away_ml:
        ml_row["consensus_away_ml"] = float(np.nanmedian(list(away_ml.values())))
        best_away_book = max(away_ml, key=lambda b: away_ml[b])
        ml_row["best_away_ml_book"] = best_away_book
        ml_row["best_away_ml"] = away_ml[best_away_book]
    else:
        ml_row["consensus_away_ml"] = np.nan
        ml_row["best_away_ml_book"] = None
        ml_row["best_away_ml"] = np.nan

    # SPREADS consensus & best (home side):
    # "Best" for a side is the most favorable number: max(point) works for both dogs and favorites
    if home_sp:
        sp_vals = list(home_sp.values())
        sp_row["consensus_home_spread"] = float(np.nanmedian(sp_vals))
        best_sp_book = max(home_sp, key=lambda b: home_sp[b])
        sp_row["best_home_spread_book"] = best_sp_book
        sp_row["best_home_spread"] = home_sp[best_sp_book]
    else:
        sp_row["consensus_home_spread"] = np.nan
        sp_row["best_home_spread_book"] = None
        sp_row["best_home_spread"] = np.nan

    # TOTALS consensus & “best” extremes:
    if tot_pts:
        tot_vals = list(tot_pts.values())
        tot_row["consensus_total"] = float(np.nanmedian(tot_vals))
        # Extremes can signal off-market numbers
        low_book = min(tot_pts, key=lambda b: tot_pts[b])
        high_book = max(tot_pts, key=lambda b: tot_pts[b])
        tot_row["lowest_total_book"] = low_book
        tot_row["lowest_total"] = tot_pts[low_book]
        tot_row["highest_total_book"] = high_book
        tot_row["highest_total"] = tot_pts[high_book]
        # If you want “best odds” to bet Over/Under (keeping one-row-per-game):
        if over_odds:
            best_over_book = max(over_odds, key=lambda b: over_odds[b])
            tot_row["best_over_odds_book"] = best_over_book
            tot_row["best_over_odds"] = over_odds[best_over_book]
        else:
            tot_row["best_over_odds_book"] = None
            tot_row["best_over_odds"] = np.nan
        if under_odds:
            best_under_book = max(under_odds, key=lambda b: under_odds[b])
            tot_row["best_under_odds_book"] = best_under_book
            tot_row["best_under_odds"] = under_odds[best_under_book]
        else:
            tot_row["best_under_odds_book"] = None
            tot_row["best_under_odds"] = np.nan
    else:
        tot_row["consensus_total"] = np.nan
        tot_row["lowest_total_book"] = None
        tot_row["lowest_total"] = np.nan
        tot_row["highest_total_book"] = None
        tot_row["highest_total"] = np.nan
        tot_row["best_over_odds_book"] = None
        tot_row["best_over_odds"] = np.nan
        tot_row["best_under_odds_book"] = None
        tot_row["best_under_odds"] = np.nan

    moneyline_rows.append(ml_row)
    spreads_rows.append(sp_row)
    totals_rows.append(tot_row)

# ---- Build the three DataFrames ----
df_moneyline = pd.DataFrame(moneyline_rows)
df_spreads = pd.DataFrame(spreads_rows)
df_totals = pd.DataFrame(totals_rows)

# Order base columns first
base = ["game_id", "home_team", "away_team", "commence_time"]
df_moneyline = df_moneyline[base + [c for c in df_moneyline.columns if c not in base]]
df_spreads   = df_spreads[base + [c for c in df_spreads.columns if c not in base]]
df_totals    = df_totals[base + [c for c in df_totals.columns if c not in base]]

df_moneyline.to_csv("data/odds/11042025/moneyline.csv")
df_spreads.to_csv("data/odds/11042025/spreads.csv")
df_totals.to_csv("data/odds/11042025/totals.csv")