In [None]:
from worster_underwood_cfb.data.cfbd_api import get_college_football_games

# Default: 24h cache TTL
df, ly_df = get_college_football_games(2025,force_refresh=True)

# Force a refresh now
#df, ly_df = get_college_football_games(2025, force_refresh=True)

# Tighter TTL during the season
#df, ly_df = get_college_football_games(2025, max_age_hours=6)

In [None]:
import pandas as pd
import numpy as np

def prepare_schedule(
    df: pd.DataFrame,
    hfa: int = 3,
    decay: float = 1/3,  # kept for compat; unused here
) -> pd.DataFrame:
    """
    Return ['week','winner','loser','hfa_margin'] ready for add_weight().
    Works with snake_case (CFBD) or camelCase.
    """
    if df.empty:
        return pd.DataFrame(columns=['week','winner','loser','hfa_margin'])

    # Normalize snake_case -> camelCase so the rest of the logic can be column-stable.
    rename_map = {
        'season_type': 'seasonType',
        'neutral_site': 'neutralSite',
        'home_team': 'homeTeam',
        'away_team': 'awayTeam',
        'home_points': 'homePoints',
        'away_points': 'awayPoints',
    }
    df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})

    required = ['seasonType','week','neutralSite','homeTeam','awayTeam','homePoints','awayPoints']
    missing = [c for c in required if c not in df.columns]
    if missing:
        raise KeyError(f"prepare_schedule missing required columns: {missing}")

    # 1) Drop canceled / incomplete
    df = df.dropna(subset=['homePoints','awayPoints']).reset_index(drop=True)
    if df.empty:
        return pd.DataFrame(columns=['week','winner','loser','hfa_margin'])

    # 2) Fail-fast invariants
    assert not df[['seasonType','week','homeTeam','awayTeam']].isna().any().any(), \
        "Nulls in required non-score fields."
    assert not df['neutralSite'].isna().any(), "neutralSite should be non-null after drop."

    # 3) Types + postseason mapping (robust to Enums / weird casings)
    df['seasonType'] = df['seasonType'].astype(str).str.split('.').str[-1].str.lower()
    df['week'] = pd.to_numeric(df['week'], errors='raise', downcast='integer')
    df.loc[df['seasonType'].eq('postseason'), 'week'] = 18
    df['week'] = df['week'].astype('int16')
    assert (df['week'] >= 1).all(), "week must be >= 1"

    # Pull arrays once (normalize neutral booleans safely)
    ns = (df['neutralSite'].replace({'True': True, 'False': False, 'true': True, 'false': False, 1: True, 0: False})
                    .astype(bool).to_numpy())
    hp = pd.to_numeric(df['homePoints'], errors='raise').to_numpy()
    ap = pd.to_numeric(df['awayPoints'], errors='raise').to_numpy()
    wk = df['week'].to_numpy()
    home = df['homeTeam'].to_numpy(object)
    away = df['awayTeam'].to_numpy(object)

    # 4) Margins & outcomes
    margin = hp - ap                          # home-perspective true margin
    home_field = np.where(ns, 0, hfa)         # 0 if neutral, else HFA
    adj_home = margin - home_field            # remove HFA from home side

    home_win = margin > 0
    winners = np.where(home_win, home, away)
    losers  = np.where(home_win, away, home)
    hfa_margin = np.where(home_win, adj_home, -adj_home)

    return pd.DataFrame({
        'week': wk,
        'winner': winners,
        'loser': losers,
        'hfa_margin': hfa_margin,
    })


def add_weight(df: pd.DataFrame, decay: float = 1/3) -> pd.DataFrame:
    """
    Calculate weights for games based on team game counts and recency.
    Weight = sqrt((total_games / max_total_games) / (weeks_ago ** decay)), normalized to sum to 100.
    Expects ['week','winner','loser','hfa_margin'].
    """
    # Allow empty DF to pass through cleanly
    if df.empty:
        return df.assign(weight=pd.Series(dtype='float64'))[
            ['week', 'winner', 'loser', 'hfa_margin', 'weight']
        ]

    # --- fail-fast checks ---
    assert decay > 0, "decay must be > 0"
    assert not df[['winner','loser']].isna().any().any(), "winner/loser must be non-null"
    assert (df['week'] >= 1).all(), "week must be >= 1"

    # Extract arrays
    winner_vals = df['winner'].values
    loser_vals = df['loser'].values
    week_vals = df['week'].values

    # Efficient team encoding
    both_teams = np.concatenate([winner_vals, loser_vals])
    codes, _ = pd.factorize(both_teams, sort=False)

    # Fast counts across both appearances (winner + loser)
    n = len(df)
    counts = np.bincount(codes)
    winner_games = counts[codes[:n]]
    loser_games  = counts[codes[n:]]
    total_games  = winner_games + loser_games

    weeks_ago = (week_vals.max() + 1) - week_vals
    max_games = total_games.max()

    if max_games > 0:
        weights = np.sqrt((total_games / max_games) / (weeks_ago ** decay))
        weights *= (100.0 / weights.sum())  # normalize to 100
    else:
        weights = np.zeros(n, dtype=np.float64)

    result = df[['week', 'winner', 'loser', 'hfa_margin']].copy()
    result['weight'] = weights
    return result


In [None]:
sched_cur  = prepare_schedule(df,  hfa=3)
sched_prev = prepare_schedule(ly_df, hfa=3)
weighted_cur = add_weight(sched_cur, decay=1/3)

In [None]:
weighted_cur

In [None]:
tie = df[df['awayPoints']-df['homePoints']==0]

In [None]:
tie.columns