In [61]:
import os, json, requests, pandas as pd, numpy as np
from datetime import datetime, timedelta, timezone
try:
    from zoneinfo import ZoneInfo
except ImportError:
    import pytz
    ZoneInfo = lambda tz: pytz.timezone(tz)

# =========================
# CONFIG
# =========================
API_KEY = "39717636a99577438683f764bc1bc4ef"
SPORT_KEY = "basketball_ncaab"
REGIONS   = "us,eu"
MARKETS   = "h2h,spreads,totals"
ODDS_FMT  = "american"

BOOKS = {"draftkings","betmgm","pinnacle","caesars","williamhill_us","fanduel"}
TZ_LOCAL = "America/New_York"
LOCAL_DAY = "2025-11-06"
OPENER_SCAN_STEP_MIN = 45
OPENER_START_LOCAL   = "06:00"
MOVEMENT_SNAPSHOTS   = 20

# =========================
# TIME HELPERS
# =========================
def local_day_bounds(local_day: str, tz_str: str):
    tz = ZoneInfo(tz_str)
    y,m,d = map(int, local_day.split("-"))
    start = datetime(y,m,d,0,0,0,tzinfo=tz)
    end   = datetime(y,m,d,23,59,59,999000,tzinfo=tz)
    return start, end

def to_utc_iso(dt: datetime) -> str:
    return dt.astimezone(timezone.utc).isoformat().replace("+00:00","Z")

def parse_hhmm(hhmm: str):
    h, m = map(int, hhmm.split(":"))
    return h, m

# =========================
# API
# =========================
def fetch_snapshot_at(ts_iso: str):
    url = (f"https://api.the-odds-api.com/v4/historical/sports/{SPORT_KEY}/odds"
           f"?regions={REGIONS}&markets={MARKETS}&oddsFormat={ODDS_FMT}&apiKey={API_KEY}&date={ts_iso}")
    r = requests.get(url, timeout=30)
    if r.status_code == 422:
        return None
    r.raise_for_status()
    return r.json()

def fetch_nearest_at_or_before(ts_iso, max_back_minutes=30):
    dt = datetime.fromisoformat(ts_iso.replace("Z","+00:00"))
    for i in range(max_back_minutes+1):
        attempt_iso = to_utc_iso(dt - timedelta(minutes=i))
        snap = fetch_snapshot_at(attempt_iso)
        if snap is not None:
            return snap
    return None

# =========================
# PARSING
# =========================
def bm_key(bm): return (bm.get("key") or "").strip().lower()

def extract_rows(snap):
    ts = snap["timestamp"]
    ml, sp, to = [], [], []
    for g in snap.get("data", []):
        base = {"timestamp": ts, "game_id": g["id"], "home_team": g["home_team"],
                "away_team": g["away_team"], "commence_time": g["commence_time"]}
        for bm in g.get("bookmakers", []):
            key = bm_key(bm)
            if key not in BOOKS: continue
            mkts = {m["key"]: m for m in bm.get("markets", [])}
            # ML
            if "h2h" in mkts:
                home = away = np.nan
                for o in mkts["h2h"]["outcomes"]:
                    if o["name"] == g["home_team"]: home = o.get("price", np.nan)
                    elif o["name"] == g["away_team"]: away = o.get("price", np.nan)
                ml.append({**base, "book": key, "home_ml": home, "away_ml": away})
            # Spreads
            if "spreads" in mkts:
                outs = mkts["spreads"]["outcomes"]
                h = next((x for x in outs if x["name"] == g["home_team"]), None)
                a = next((x for x in outs if x["name"] == g["away_team"]), None)
                sp.append({**base, "book": key,
                           "home_spread_point": h.get("point") if h else np.nan,
                           "away_spread_point": a.get("point") if a else np.nan})
            # Totals
            if "totals" in mkts:
                outs = mkts["totals"]["outcomes"]
                o = next((x for x in outs if x.get("name","").lower()=="over"), None)
                to.append({**base, "book": key, "total_points": o.get("point") if o else np.nan})
    return ml, sp, to

# =========================
# DF HELPERS
# =========================
def changes_only(df, cols):
    if df.empty: return df
    df = df.sort_values(["game_id","book","timestamp"])
    mask = pd.Series(False, index=df.index)
    for c in cols:
        ch = (df[c] != df.groupby(["game_id","book"])[c].shift(1)) & df[c].notna()
        mask |= ch.fillna(False)
    return df.loc[mask].copy()

def summarize(df, cols, label):
    if df.empty: return pd.DataFrame()
    df = df.sort_values(["game_id","book","timestamp"])
    keys = ["game_id","home_team","away_team","commence_time","book"]
    first = df.groupby(keys).first().reset_index()
    last  = df.groupby(keys).last().reset_index()
    out = first[keys].copy()
    for c in cols:
        out[f"opening_{label}_{c}"] = first[c].values
        out[f"current_{label}_{c}"] = last[c].values
        out[f"movement_{label}_{c}"] = last[c].values - first[c].values
    mv = []
    for _, gdf in df.groupby(keys):
        rec = {k: gdf.iloc[0][k] for k in keys}
        for c in cols:
            s = gdf[c].astype(float)
            rec[f"num_moves_{label}_{c}"] = int(((s != s.shift(1)) & s.notna()).sum())
        mv.append(rec)
    return out.merge(pd.DataFrame(mv), on=keys, how="left")

# =========================
# MAIN
# =========================
if __name__ == "__main__":
    start_day, end_day = local_day_bounds(LOCAL_DAY, TZ_LOCAL)
    opener_times = []
    cur = (start_day - timedelta(days=1)).replace(hour=6, minute=0)
    while cur <= end_day:
        opener_times.append(to_utc_iso(cur))
        cur += timedelta(minutes=OPENER_SCAN_STEP_MIN)
    day_start_iso, day_end_iso = to_utc_iso(start_day), to_utc_iso(end_day)

    openers = {"ml": {}, "spread": {}, "total": {}}
    games_today = set()

    # ---------- OPENER PASS ----------
    for ts in opener_times:
        snap = fetch_snapshot_at(ts)
        if not snap: continue
        ml_rows, sp_rows, tot_rows = extract_rows(snap)
        for rows in (ml_rows, sp_rows, tot_rows):
            for r in rows:
                tz = ZoneInfo(TZ_LOCAL)
                dt = datetime.fromisoformat(r["commence_time"].replace("Z","+00:00")).astimezone(tz)
                if dt.strftime("%Y-%m-%d") == LOCAL_DAY:
                    games_today.add(r["game_id"])
        def record_first(rows, market, cols):
            for r in rows:
                if r["game_id"] not in games_today: continue
                key = (r["game_id"], r["book"])
                if key not in openers[market] and any(pd.notna(r.get(c)) for c in cols):
                    openers[market][key] = (r["timestamp"], tuple(r.get(c) for c in cols))
        record_first(ml_rows, "ml", ["home_ml","away_ml"])
        record_first(sp_rows, "spread", ["home_spread_point","away_spread_point"])
        record_first(tot_rows, "total", ["total_points"])

    # ---------- MOVEMENT PASS ----------
    sample_times = {day_start_iso, day_end_iso}
    for market in ("ml","spread","total"):
        for (gid, book), (ts, _) in openers[market].items():
            t0 = datetime.fromisoformat(ts.replace("Z","+00:00"))
            t1 = datetime.fromisoformat(day_end_iso.replace("Z","+00:00"))
            step = (t1 - t0) / MOVEMENT_SNAPSHOTS
            for i in range(1, MOVEMENT_SNAPSHOTS+1):
                sample_times.add(to_utc_iso(t0 + i*step))
    sample_times = sorted(sample_times)

    ml_hist, sp_hist, tot_hist = [], [], []
    for ts in sample_times:
        snap = fetch_snapshot_at(ts)
        if not snap: continue
        ml_rows, sp_rows, tot_rows = extract_rows(snap)
        ml_hist += [r for r in ml_rows  if r["game_id"] in games_today]
        sp_hist += [r for r in sp_rows  if r["game_id"] in games_today]
        tot_hist+= [r for r in tot_rows if r["game_id"] in games_today]

    final_snap = fetch_nearest_at_or_before(day_end_iso)
    if final_snap:
        f_ml, f_sp, f_tot = extract_rows(final_snap)
        ml_hist += [r for r in f_ml  if r["game_id"] in games_today]
        sp_hist += [r for r in f_sp  if r["game_id"] in games_today]
        tot_hist+= [r for r in f_tot if r["game_id"] in games_today]

    # ---------- DATAFRAMES ----------
    df_ml_hist      = pd.DataFrame(ml_hist)
    df_spreads_hist = pd.DataFrame(sp_hist)
    df_totals_hist  = pd.DataFrame(tot_hist)

    df_ml_summary      = summarize(df_ml_hist, ["home_ml","away_ml"], "ml")
    df_spreads_summary = summarize(df_spreads_hist, ["home_spread_point","away_spread_point"], "spread")
    df_totals_summary  = summarize(df_totals_hist, ["total_points"], "total")

    # assuming openers['ml'] is your dictionary
    data = []
    for (game_id, book), (ts, (home_ml, away_ml)) in openers['ml'].items():
        data.append({
            "game_id": game_id,
            "book": book,
            "opening_timestamp": ts,
        })

    df_openers_ml = pd.DataFrame(data)

    data = []
    for (game_id, book), (ts, (home_ml)) in openers['total'].items():
        data.append({
            "game_id": game_id,
            "book": book,
            "opening_timestamp": ts,
        })
    df_openers_total = pd.DataFrame(data)

    data = []
    for (game_id, book), (ts, (home_ml, away_ml)) in openers['spread'].items():
        data.append({
            "game_id": game_id,
            "book": book,
            "opening_timestamp": ts,
        })
    df_openers_spread = pd.DataFrame(data)
    df_ml_summary = df_ml_summary.merge(df_openers_ml[["game_id", "opening_timestamp", "book"]], on=['game_id', 'book'])
    df_totals_summary = df_totals_summary.merge(df_openers_total[["game_id", "opening_timestamp", "book"]], on=['game_id', 'book'])
    df_spreads_summary = df_spreads_summary.merge(df_openers_spread[["game_id", "opening_timestamp", "book"]], on=['game_id', 'book'])
    df_totals_summary["opening_timestamp"] = (
        pd.to_datetime(df_totals_summary["opening_timestamp"], utc=True)
        .dt.tz_convert("America/New_York")
    )

    df_ml_summary["opening_timestamp"] = (
        pd.to_datetime(df_ml_summary["opening_timestamp"], utc=True)
        .dt.tz_convert("America/New_York")
    )

    df_spreads_summary["opening_timestamp"] = (
        pd.to_datetime(df_spreads_summary["opening_timestamp"], utc=True)
        .dt.tz_convert("America/New_York")
    )

    os.makedirs(f"data/odds/{LOCAL_DAY}/", exist_ok=True)
    df_totals_summary.to_csv(f"data/odds/{LOCAL_DAY}/totals.csv")
    df_spreads_summary.to_csv(f"data/odds/{LOCAL_DAY}/spreads.csv")
    df_ml_summary.to_csv(f"data/odds/{LOCAL_DAY}/moneyline.csv")

## 22740 - 24450

In [96]:
from dotenv import load_dotenv
import os, re

import warnings
warnings.filterwarnings("ignore")

load_dotenv()

API_KEY = os.getenv("ODDS_API_KEY")

LOCAL_DAY = "2025-11-06"

df_totals_summary = pd.read_csv(f"data/odds/{LOCAL_DAY}/totals.csv")
df_spreads_summary = pd.read_csv(f"data/odds/{LOCAL_DAY}/spreads.csv")
df_ml_summary = pd.read_csv(f"data/odds/{LOCAL_DAY}/moneyline.csv")

def slug(s):
    s = s.strip().lower()
    s = re.sub(r'[^a-z0-9]+', '_', s)
    return re.sub(r'_+', '_', s).strip('_')

api_url = (
    f"https://api.the-odds-api.com/v4/sports/basketball_ncaab/odds"
    f"?regions=us,us2,eu,uk&markets=h2h,spreads,totals&oddsFormat=american&apiKey={API_KEY}"
)
resp = requests.get(api_url)
resp.raise_for_status()
games = json.loads(resp.text)

moneyline_rows = []
spreads_rows = []
totals_rows = []

for g in games:
    base = {
        "game_id": g["id"],
        "home_team": g["home_team"],
        "away_team": g["away_team"],
        "commence_time": g["commence_time"],
    }
    ml_row = base.copy()
    sp_row = base.copy()
    tot_row = base.copy()

    # Temp holders to compute consensus/best
    home_ml = {}   # {book: price}
    away_ml = {}   # {book: price}
    home_sp = {}   # {book: point}  (spread point for HOME side)
    # If you'd rather use AWAY side spreads, build away_sp analogously
    tot_pts = {}   # {book: total points (from Over)}

    # Also keep odds for “best over/under” picks on totals
    over_odds = {}   # {book: odds}
    under_odds = {}  # {book: odds}

    for bm in g.get("bookmakers", []):
        book = slug(bm.get("title") or bm.get("key", ""))
        markets = {m["key"]: m for m in bm.get("markets", [])}

        # --- MONEYLINE ---
        if "h2h" in markets:
            for o in markets["h2h"].get("outcomes", []):
                if o["name"] == g["home_team"]:
                    home_ml[book] = o.get("price")
                elif o["name"] == g["away_team"]:
                    away_ml[book] = o.get("price")

        # --- SPREADS (home side) ---
        if "spreads" in markets:
            outs = markets["spreads"].get("outcomes", [])
            o_home = next((x for x in outs if x.get("name") == g["home_team"]), None)
            if o_home and (o_home.get("point") is not None):
                home_sp[book] = o_home["point"]

        # --- TOTALS ---
        if "totals" in markets:
            outs = markets["totals"].get("outcomes", [])
            o_over = next((x for x in outs if x.get("name","").lower()=="over"), None)
            o_under = next((x for x in outs if x.get("name","").lower()=="under"), None)
            # Use the Over's point as "the" total line (books nearly always equal O/U)
            if o_over and (o_over.get("point") is not None):
                tot_pts[book] = o_over["point"]
                over_odds[book] = o_over.get("price")
            if o_under:
                under_odds[book] = o_under.get("price")

    # --- populate columns with only "value of the line" ---
    # MONEYLINE: we expose two columns per book so you can compare across books for each side
    for book in sorted(set(home_ml.keys()) | set(away_ml.keys())):
        ml_row[f"{book}_home"] = home_ml.get(book, np.nan)
        ml_row[f"{book}_away"] = away_ml.get(book, np.nan)

    # SPREADS: one column per book holding the HOME spread point value
    for book in sorted(home_sp.keys()):
        sp_row[book] = home_sp[book]

    # TOTALS: one column per book holding the total points number
    for book in sorted(tot_pts.keys()):
        tot_row[book] = tot_pts[book]

    # --- consensus + best lines ---
    # MONEYLINE consensus (median across books)
    if home_ml:
        ml_row["consensus_home_ml"] = float(np.nanmedian(list(home_ml.values())))
        # Best for backers = highest American odds number
        best_home_book = max(home_ml, key=lambda b: home_ml[b])
        ml_row["best_home_ml_book"] = best_home_book
        ml_row["best_home_ml"] = home_ml[best_home_book]
    else:
        ml_row["consensus_home_ml"] = np.nan
        ml_row["best_home_ml_book"] = None
        ml_row["best_home_ml"] = np.nan

    if away_ml:
        ml_row["consensus_away_ml"] = float(np.nanmedian(list(away_ml.values())))
        best_away_book = max(away_ml, key=lambda b: away_ml[b])
        ml_row["best_away_ml_book"] = best_away_book
        ml_row["best_away_ml"] = away_ml[best_away_book]
    else:
        ml_row["consensus_away_ml"] = np.nan
        ml_row["best_away_ml_book"] = None
        ml_row["best_away_ml"] = np.nan

    # SPREADS consensus & best (home side):
    # "Best" for a side is the most favorable number: max(point) works for both dogs and favorites
    if home_sp:
        sp_vals = list(home_sp.values())
        sp_row["consensus_home_spread"] = float(np.nanmedian(sp_vals))
        best_sp_book = max(home_sp, key=lambda b: home_sp[b])
        sp_row["best_home_spread_book"] = best_sp_book
        sp_row["best_home_spread"] = home_sp[best_sp_book]
    else:
        sp_row["consensus_home_spread"] = np.nan
        sp_row["best_home_spread_book"] = None
        sp_row["best_home_spread"] = np.nan

    # TOTALS consensus & “best” extremes:
    if tot_pts:
        tot_vals = list(tot_pts.values())
        tot_row["consensus_total"] = float(np.nanmedian(tot_vals))
        # Extremes can signal off-market numbers
        low_book = min(tot_pts, key=lambda b: tot_pts[b])
        high_book = max(tot_pts, key=lambda b: tot_pts[b])
        tot_row["lowest_total_book"] = low_book
        tot_row["lowest_total"] = tot_pts[low_book]
        tot_row["highest_total_book"] = high_book
        tot_row["highest_total"] = tot_pts[high_book]
        # If you want “best odds” to bet Over/Under (keeping one-row-per-game):
        if over_odds:
            best_over_book = max(over_odds, key=lambda b: over_odds[b])
            tot_row["best_over_odds_book"] = best_over_book
            tot_row["best_over_odds"] = over_odds[best_over_book]
        else:
            tot_row["best_over_odds_book"] = None
            tot_row["best_over_odds"] = np.nan
        if under_odds:
            best_under_book = max(under_odds, key=lambda b: under_odds[b])
            tot_row["best_under_odds_book"] = best_under_book
            tot_row["best_under_odds"] = under_odds[best_under_book]
        else:
            tot_row["best_under_odds_book"] = None
            tot_row["best_under_odds"] = np.nan
    else:
        tot_row["consensus_total"] = np.nan
        tot_row["lowest_total_book"] = None
        tot_row["lowest_total"] = np.nan
        tot_row["highest_total_book"] = None
        tot_row["highest_total"] = np.nan
        tot_row["best_over_odds_book"] = None
        tot_row["best_over_odds"] = np.nan
        tot_row["best_under_odds_book"] = None
        tot_row["best_under_odds"] = np.nan

    moneyline_rows.append(ml_row)
    spreads_rows.append(sp_row)
    totals_rows.append(tot_row)

# ---- Build the three DataFrames ----
df_moneyline = pd.DataFrame(moneyline_rows)
df_spreads = pd.DataFrame(spreads_rows)
df_totals = pd.DataFrame(totals_rows)

# Order base columns first
base = ["game_id", "home_team", "away_team", "commence_time"]
df_moneyline = df_moneyline[base + [c for c in df_moneyline.columns if c not in base]]
df_spreads   = df_spreads[base + [c for c in df_spreads.columns if c not in base]]
df_totals    = df_totals[base + [c for c in df_totals.columns if c not in base]]


df_spreads = df_spreads[['game_id', 'home_team', 'away_team','betmgm', 'fanduel', 'pinnacle', 'draftkings']]
df_totals = df_totals[['game_id', 'home_team', 'away_team','betmgm', 'fanduel', 'pinnacle', 'draftkings']]
df_moneyline = df_moneyline[['game_id', 'home_team', 'away_team','betmgm_home', 'fanduel_home', 'pinnacle_home', 'draftkings_home','betmgm_away', 'fanduel_away', 'pinnacle_away', 'draftkings_away']]

df_sharp_spreads = df_spreads_summary[df_spreads_summary['book'] == "fanduel"][["game_id", "home_team", "away_team", 'opening_spread_home_spread_point', 'opening_spread_away_spread_point']]
df_sharp_spreads['spread'] = np.where(df_sharp_spreads['opening_spread_home_spread_point'] < 0, df_sharp_spreads['opening_spread_home_spread_point'], df_sharp_spreads['opening_spread_away_spread_point'])
df_sharp_totals = df_totals_summary[df_totals_summary['book'] == "fanduel"][["game_id", "home_team", "away_team", 'opening_total_total_points']]

totals_df = df_sharp_totals.merge(df_totals[["game_id", "fanduel"]])
spreads_df = df_sharp_spreads.merge(df_spreads[["game_id", "fanduel"]])

spreads_df = spreads_df[["home_team","away_team", "spread", "fanduel"]]
totals_df = totals_df[["home_team", "away_team", "opening_total_total_points", "fanduel"]]

totals_df.columns = ['home', 'away', 'open', 'current']
totals_df['movement'] = totals_df['open'] - totals_df['current']
totals_df.to_csv(f"data/odds/{LOCAL_DAY}/totals_final.csv")

spreads_df.columns = ['home', 'away', 'open', 'current']
spreads_df['movement'] = (-1 * spreads_df['open']) - (-1 * (spreads_df['current']))
spreads_df.to_csv(f"data/odds/{LOCAL_DAY}/spreads_final.csv")