# NRL Odds Ingestion (Colab)
Fetch via TheOddsAPI (if key set) or parse local snapshots, preview odds, and (optionally) PR to data branch.

In [None]:
import os
import pathlib as P
import re
import subprocess

REPO = os.getenv("REPO", "")
if not REPO:
    try:
        remote = subprocess.check_output(
            ["git", "remote", "get-url", "origin"], text=True
        ).strip()
        m = re.search(r"github\.com[:/]+([^/]+)/([^/.]+)", remote)
        REPO = f"{m.group(1)}/{m.group(2)}" if m else ""
    except Exception:
        REPO = ""
if not REPO:
    REPO = "aturoa13699-lab/NRL-ENGINE"
if not P.Path("work").exists():
    !git clone -q https://github.com/{REPO}.git work
%cd work
!pip -q install pandas requests pyarrow fastparquet lxml

In [None]:
from tools.fetch_api_odds import main as fetch_api

try:
    fetch_api()
except SystemExit:
    print("API fetch skipped.")

In [None]:
from glob import glob
from tools.ingestors.oddspedia import parse as parse_od
import pandas as pd
import pathlib as P

rows = []
for f in glob("manual_feeds/*/oddspedia*_auto.html"):
    try:
        rows.append(parse_od(f))
    except Exception as e:
        print("Parse fail", f, e)
if rows:
    df = pd.concat(rows, ignore_index=True).drop_duplicates(
        subset=["date", "home_team", "away_team"], keep="last"
    )
    out = P.Path("data/sources")
    out.mkdir(parents=True, exist_ok=True)
    base = (
        pd.read_csv(out / "odds.csv") if (out / "odds.csv").exists() else pd.DataFrame()
    )
    df = pd.concat([base, df], ignore_index=True) if not base.empty else df
    df.drop_duplicates(
        subset=["date", "home_team", "away_team"], keep="last", inplace=True
    )
    df.to_csv(out / "odds.csv", index=False)
print("odds.csv exists?", P.Path("data/sources/odds.csv").exists())

In [None]:
import pandas as pd
import pathlib as P

p = P.Path("data/sources/odds.csv")
if p.exists():
    display(pd.read_csv(p).tail(10))
else:
    print("No odds.csv yet.")