In [1]:
import pandas as pd
import numpy as np
import os

DATE = "2025-11-06"
CLEAN_DATE = DATE.replace("-","")
daily_preds = pd.read_csv(f"data/predictions/{CLEAN_DATE}/preds.csv")

spreads_df = pd.read_csv(f"data/odds/{DATE}/spreads_final.csv")
totals_df = pd.read_csv(f"data/odds/{DATE}/totals_final.csv")

map = pd.read_csv("data/teams/odds_map.csv")

In [2]:
def _safe_ratio(num, den):
    den = np.where(np.abs(den) < 1e-8, np.nan, den)
    return np.where(np.isnan(num) | np.isnan(den), np.nan, num / den)

def reconcile_game(row, 
                   w_score=1.0, 
                   w_sumhalves=1.0, 
                   w_margin=1.0,
                   use_both_margins=False):
    """
    Reconcile one game's predictions via weighted least squares.
    
    Unknowns: x = [S_h, S_a]^T (reconciled home & away totals)
    Observations (linear):
      - pred_home_score          ≈ [1, 0] @ x
      - pred_away_score          ≈ [0, 1] @ x
      - (pred_home_1h+2h)        ≈ [1, 0] @ x
      - (pred_away_1h+2h)        ≈ [0, 1] @ x
      - margin (home - away)     ≈ [1, -1] @ x

    Weights (inverse-variance proxies): w_score, w_sumhalves, w_margin.
    If use_both_margins is True, both home and away margins are used
    (the away margin is negated to be home - away); otherwise we use the
    home margin if present, else the negated away margin if present.
    """

    A_rows, y_vals, w_vals = [], [], []

    # Pull columns, allowing for missing values (NaNs)
    ph1, ph2 = row.get('pred_home_1h'), row.get('pred_home_2h')
    pa1, pa2 = row.get('pred_away_1h'), row.get('pred_away_2h')
    Sh, Sa = row.get('pred_home_score'), row.get('pred_away_score')
    Mh, Ma = row.get('pred_home_margin'), row.get('pred_away_margin')

    # Full score observations
    if pd.notna(Sh):
        A_rows.append([1.0, 0.0]); y_vals.append(Sh); w_vals.append(w_score)
    if pd.notna(Sa):
        A_rows.append([0.0, 1.0]); y_vals.append(Sa); w_vals.append(w_score)

    # Sum-of-halves observations
    if pd.notna(ph1) and pd.notna(ph2):
        A_rows.append([1.0, 0.0]); y_vals.append(ph1 + ph2); w_vals.append(w_sumhalves)
    if pd.notna(pa1) and pd.notna(pa2):
        A_rows.append([0.0, 1.0]); y_vals.append(pa1 + pa2); w_vals.append(w_sumhalves)

    # Margin observations (home - away)
    if use_both_margins:
        # Use both, each with half-weight to avoid double-counting
        used_any = False
        if pd.notna(Mh):
            A_rows.append([1.0, -1.0]); y_vals.append(Mh); w_vals.append(w_margin * 0.5)
            used_any = True
        if pd.notna(Ma):
            A_rows.append([1.0, -1.0]); y_vals.append(-Ma); w_vals.append(w_margin * 0.5)
            used_any = True
        # If neither present, nothing is added.
    else:
        if pd.notna(Mh):
            A_rows.append([1.0, -1.0]); y_vals.append(Mh); w_vals.append(w_margin)
        elif pd.notna(Ma):
            A_rows.append([1.0, -1.0]); y_vals.append(-Ma); w_vals.append(w_margin)

    # If we have no observations, return NaNs
    if len(A_rows) == 0:
        return pd.Series({
            'cons_home_score': np.nan,
            'cons_away_score': np.nan,
            'cons_margin':     np.nan,
            'cons_home_1h':    np.nan,
            'cons_home_2h':    np.nan,
            'cons_away_1h':    np.nan,
            'cons_away_2h':    np.nan
        })

    A = np.asarray(A_rows, dtype=float)        # shape (m, 2)
    y = np.asarray(y_vals, dtype=float)        # shape (m,)
    w = np.asarray(w_vals, dtype=float)        # shape (m,)

    # Weighted least squares: solve argmin || W^{1/2}(Ax - y) ||^2
    Wsqrt = np.sqrt(w)[:, None]                # shape (m,1)
    Aw = A * Wsqrt                             # (m,2)
    yw = y * Wsqrt.ravel()                     # (m,)

    # Solve via QR/lstsq for numerical stability
    x_hat, *_ = np.linalg.lstsq(Aw, yw, rcond=None)
    S_h, S_a = x_hat[0], x_hat[1]
    M = S_h - S_a

    # Allocate halves using observed split ratios (if available)
    # If no halves exist, we leave halves as NaN but totals/margin stand.
    home_sum_halves = (ph1 + ph2) if (pd.notna(ph1) and pd.notna(ph2)) else np.nan
    away_sum_halves = (pa1 + pa2) if (pd.notna(pa1) and pd.notna(pa2)) else np.nan

    # Ratios of 1H within sum-of-halves (shrinkage could be added if needed)
    rh = _safe_ratio(ph1, home_sum_halves) if pd.notna(home_sum_halves) else np.nan
    ra = _safe_ratio(pa1, away_sum_halves) if pd.notna(away_sum_halves) else np.nan

    cons_home_1h = rh * S_h if pd.notna(rh) else np.nan
    cons_home_2h = (1 - rh) * S_h if pd.notna(rh) else np.nan
    cons_away_1h = ra * S_a if pd.notna(ra) else np.nan
    cons_away_2h = (1 - ra) * S_a if pd.notna(ra) else np.nan

    return pd.Series({
        'cons_home_score': S_h,
        'cons_away_score': S_a,
        'cons_margin':     M,
        'cons_home_1h':    cons_home_1h,
        'cons_home_2h':    cons_home_2h,
        'cons_away_1h':    cons_away_1h,
        'cons_away_2h':    cons_away_2h
    })

def reconcile_dataframe(df,
                        w_score=1.0,
                        w_sumhalves=1.0,
                        w_margin=1.0,
                        use_both_margins=False):
    """
    Apply reconciliation to all rows in a dataframe that contains columns:
      pred_home_1h, pred_home_2h, pred_home_score, pred_home_margin
      pred_away_1h, pred_away_2h, pred_away_score, pred_away_margin

    Returns a copy with appended consensus columns.
    """
    required_any = [
        # at least some of these should exist; we check per-row
        'pred_home_1h','pred_home_2h','pred_home_score','pred_home_margin',
        'pred_away_1h','pred_away_2h','pred_away_score','pred_away_margin'
    ]
    for c in required_any:
        if c not in df.columns:
            # Create if missing so per-row logic can gracefully handle NaNs
            df[c] = np.nan

    out = df.apply(
        lambda r: reconcile_game(
            r,
            w_score=w_score,
            w_sumhalves=w_sumhalves,
            w_margin=w_margin,
            use_both_margins=use_both_margins
        ),
        axis=1
    )
    return pd.concat([df.reset_index(drop=True), out], axis=1)

# ---- Example usage ----
if __name__ == "__main__":
    # Example schema; plug in your dataframe 'df' with matching columns.
    # df = pd.read_csv("your_predictions.csv")
    result = reconcile_dataframe(daily_preds, w_score=1.0, w_sumhalves=1.0, w_margin=1.0)

In [3]:
result['cons_1h_total'] = result['cons_away_1h'] + result['cons_home_1h']
result['cons_total'] = result['cons_home_score'] + result['cons_away_score']
result['cons_2h_total'] = result['cons_away_2h'] + result['cons_home_2h']
result = result[["date", "home", "away", "cons_home_score", "cons_away_score", "cons_margin", "cons_total", "cons_home_1h", "cons_home_2h", "cons_away_1h", "cons_away_2h"]]

In [4]:
result.columns = ["date", "home", "away", "home_score", "away_score", "margin", "total", "home_1h", "home_2h", "away_1h", "away_2h"]

In [5]:
totals_df = totals_df.merge(map, left_on="home", right_on="odds")[["espn", "open", "current", "movement"]]

In [6]:
result = result.merge(totals_df, left_on='home', right_on='espn')[["date", "home", "away", "home_score", "away_score", "margin", "total", "home_1h", "home_2h", "away_1h", "away_2h", "open", "current", "movement"]]

In [7]:
result.columns = ['date', 'home', 'away', 'home_score', 'away_score', 'margin', 'total',
       'home_1h', 'home_2h', 'away_1h', 'away_2h', 'total_open', 'total_current',
       'total_movement']

In [8]:
spreads_df = spreads_df.merge(map, left_on="home", right_on="odds")[["espn", "open", "current", "movement"]]


In [9]:
result = result.merge(spreads_df, left_on='home', right_on='espn')[["date", "home", "away", "home_score", "away_score", "margin", "total", "home_1h", "home_2h", "away_1h", "away_2h", 'total_open', 'total_current', 'total_movement', "open", "current", "movement"]]

In [10]:
result.columns = ['date', 'home', 'away', 'home_score', 'away_score', 'margin', 'total',
       'home_1h', 'home_2h', 'away_1h', 'away_2h', 'total_open',
       'total_current', 'total_movement', 'spread_open', 'spread_current', 'spread_movement']

In [11]:
result['total_diff'] = abs(result['total'] - result['total_current'])
result['margin_diff'] = abs(result['margin'] - result['spread_current'])

In [12]:
result.to_csv("/Users/nickdimmitt/Desktop/daily_preds.csv")
result.to_csv(f"data/predictions/{CLEAN_DATE}/daily_preds.csv")