In [None]:
import pyarrow, pandas as pd, sys
pyarrow.__version__, pd.__version__, sys.executable

In [None]:
from worster_underwood_cfb.data.cfbd_api import get_college_football_games

# Default: 24h cache TTL
df, ly_df = get_college_football_games(2025,force_refresh=True)

# Force a refresh now
#df, ly_df = get_college_football_games(2025, force_refresh=True)

# Tighter TTL during the season
#df, ly_df = get_college_football_games(2025, max_age_hours=6)

In [None]:
df

In [None]:
# worster_underwood_cfb/data/cfbd_api.py
from __future__ import annotations

from typing import Tuple
import os
import time
import pathlib
import pandas as pd
from dotenv import load_dotenv

from cfbd.configuration import Configuration
from cfbd.api_client import ApiClient
from cfbd import GamesApi  # type: ignore
from cfbd.models.division_classification import DivisionClassification
from cfbd.rest import ApiException

__all__ = [
    "get_college_football_games",
    "clear_games_cache",
    "_make_games_api",  # exported for reuse/tests if you want it
]

# -------------------------------
# Cache configuration
# -------------------------------
_CACHE_VERSION = "v1"  # bump if you change schema/filters so old files don't mix
_DEFAULT_TTL_HOURS = 24
_DEFAULT_CACHE_DIR = os.getenv("WU_CFB_CACHE_DIR", ".cache/cfbd")


# -------------------------------
# Small cache helpers
# -------------------------------
def _cache_base(cache_dir: str, year: int) -> pathlib.Path:
    """
    Base path for cache; we'll add .parquet or .pkl as an extension.
    Example: .cache/cfbd/games_2025_v1.[parquet|pkl]
    """
    p = pathlib.Path(cache_dir)
    p.mkdir(parents=True, exist_ok=True)
    return p / f"games_{year}_{_CACHE_VERSION}"


def _is_fresh(base: pathlib.Path, max_age_hours: int) -> bool:
    """Return True if a cache file exists and is newer than the TTL."""
    for ext in (".parquet", ".pkl"):
        path = base.with_suffix(ext)
        if path.exists():
            age_seconds = time.time() - path.stat().st_mtime
            return age_seconds <= max_age_hours * 3600
    return False


def _read_cache(base: pathlib.Path) -> pd.DataFrame | None:
    """Try Parquet, then Pickle; return DataFrame or None."""
    pq = base.with_suffix(".parquet")
    if pq.exists():
        try:
            return pd.read_parquet(pq)
        except Exception:
            pass
    pkl = base.with_suffix(".pkl")
    if pkl.exists():
        try:
            return pd.read_pickle(pkl)
        except Exception:
            pass
    return None


def _write_cache(base: pathlib.Path, df: pd.DataFrame) -> None:
    """Prefer Parquet; fall back to Pickle if pyarrow/fastparquet is missing."""
    try:
        df.to_parquet(base.with_suffix(".parquet"), index=False)
    except Exception:
        df.to_pickle(base.with_suffix(".pkl"))


# -------------------------------
# CFBD client factory
# -------------------------------
def _make_games_api() -> GamesApi:
    """
    Build an authenticated GamesApi client with host + token configured.
    Centralized here to avoid repeating auth/host logic.
    """
    load_dotenv()
    token = os.getenv("CFBD_API_KEY")
    if not token:
        raise RuntimeError("CFBD_API_KEY is not set in the environment")

    cfg = Configuration(host="https://api.collegefootballdata.com")

    # Support both generated-client auth styles
    if hasattr(cfg, "access_token"):
        # Some cfbd client versions expose an access_token attribute
        cfg.access_token = token
    else:
        # Others expect api_key + api_key_prefix to form "Authorization: Bearer <token>"
        if not hasattr(cfg, "api_key"):
            cfg.api_key = {}
        if not hasattr(cfg, "api_key_prefix"):
            cfg.api_key_prefix = {}
        cfg.api_key["Authorization"] = token
        cfg.api_key_prefix["Authorization"] = "Bearer"

    return GamesApi(ApiClient(cfg))


# -------------------------------
# Internal fetcher (REST only)
# -------------------------------
def _fetch_year(api: GamesApi, year: int) -> pd.DataFrame:
    """
    Fetch FBS + FCS for a given year via REST and de-dup by game id.
    """
    fbs = api.get_games(year=year, classification=DivisionClassification("fbs"))
    fcs = api.get_games(year=year, classification=DivisionClassification("fcs"))
    games = list(fbs) + list(fcs)
    if not games:
        return pd.DataFrame()
    df = pd.DataFrame(g.to_dict() for g in games)
    return df.drop_duplicates(subset=["id"]).reset_index(drop=True)


# -------------------------------
# Public API
# -------------------------------
def get_college_football_games(
    year: int = 2024,
    *,
    cache_dir: str = _DEFAULT_CACHE_DIR,
    max_age_hours: int = _DEFAULT_TTL_HOURS,
    force_refresh: bool = False,
    api: GamesApi | None = None,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Fetch combined FBS + FCS games for `year` and `year-1`, with a 24h on-disk cache.

    Returns:
        (df, ly_df): DataFrames for the requested year and previous year, each de-duped by game id.

    Parameters:
        year: target season (e.g., 2025).
        cache_dir: where to store cache files (default .cache/cfbd or $WU_CFB_CACHE_DIR).
        max_age_hours: TTL; if cache older than this, we refresh (default 24 hours).
        force_refresh: ignore any cache this call and overwrite it.
        api: optional pre-built GamesApi client (useful for tests). If not provided,
             this function builds and closes its own client.
    """
    # Cache bases
    cur_base = _cache_base(cache_dir, year)
    prev_base = _cache_base(cache_dir, year - 1)

    # Try cache (unless forced)
    df_cur = None if force_refresh or not _is_fresh(cur_base, max_age_hours) else _read_cache(cur_base)
    df_prev = None if force_refresh or not _is_fresh(prev_base, max_age_hours) else _read_cache(prev_base)

    if df_cur is not None and df_prev is not None:
        return df_cur, df_prev

    close_when_done = False
    if api is None:
        api = _make_games_api()
        close_when_done = True

    try:
        if df_cur is None:
            df_cur = _fetch_year(api, year)
            _write_cache(cur_base, df_cur)
        if df_prev is None:
            df_prev = _fetch_year(api, year - 1)
            _write_cache(prev_base, df_prev)
    except ApiException as e:
        raise RuntimeError(
            f"CFBD API error (status={getattr(e, 'status', '?')}): {getattr(e, 'body', e)}"
        ) from e
    finally:
        if close_when_done:
            # Free HTTP resources if we created the client
            try:
                api.api_client.close()  # type: ignore[attr-defined]
            except Exception:
                pass

    return df_cur, df_prev


def clear_games_cache(cache_dir: str = _DEFAULT_CACHE_DIR) -> None:
    """
    Remove all cached game files (handy after bumping _CACHE_VERSION or debugging).
    """
    p = pathlib.Path(cache_dir)
    if not p.exists():
        return
    for f in p.glob("games_*.*"):
        try:
            f.unlink()
        except Exception:
            pass


In [None]:
import cfbd

In [None]:
import time
import cfbd
from pprint import pprint
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np

from cfbd.models.division_classification import DivisionClassification
from cfbd.models.game import Game
from cfbd.models.season_type import SeasonType
from cfbd.rest import ApiException

load_dotenv()
# Defining the host is optional and defaults to https://api.collegefootballdata.com
# See configuration.py for a list of all supported configuration parameters.
configuration = cfbd.Configuration(
    host = "https://api.collegefootballdata.com"
)

# The client must configure the authentication and authorization parameters
# in accordance with the API server security policy.
# Examples for each auth method are provided below, use the example that
# satisfies your auth use case.

# Configure Bearer authorization: apiKey
configuration = cfbd.Configuration(
    access_token = os.environ.get("CFBD_API_KEY")
)



In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.GamesApi(api_client)
    year = 2024 
    
    try:
        fbs = api_instance.get_games(year=year, classification=cfbd.DivisionClassification('fbs'))
        fcs = api_instance.get_games(year=year, classification=cfbd.DivisionClassification('fcs'))
        ly_fbs = api_instance.get_games(year=year-1, classification=cfbd.DivisionClassification('fbs'))
        
        all_d1 = fbs + fcs
        df = pd.DataFrame([g.to_dict() for g in all_d1]).drop_duplicates(subset=["id"])
        
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

In [None]:
df

In [None]:
from typing import Iterable

def prepare_schedule(
    api_response: Iterable,  
    hfa: int = 3,
    decay: float = 1/3,  
) -> pd.DataFrame:
    """
    Return ['week','winner','loser','hfa_margin'] ready for add_weight().
    - Drop canceled/incomplete games (missing scores)
    - Assert neutralSite complete (per your rule after drop)
    - Winner-perspective, HFA-adjusted margin
    - Assert no ties (FBS)
    """
    
    cols = ['seasonType','week','neutralSite',
            'homeTeam','awayTeam','homePoints','awayPoints']

    # Vectorized load in one shot
    df = pd.DataFrame.from_records((g.to_dict() for g in api_response), columns=cols)

    # 1) Drop canceled / incomplete
    df = df.dropna(subset=['homePoints','awayPoints']).reset_index(drop=True)
    if df.empty:
        return pd.DataFrame(columns=['week','winner','loser','hfa_margin'])

    # 2) Fail-fast invariants
    assert not df[['seasonType','week','homeTeam','awayTeam']].isna().any().any(), \
        "Nulls in required non-score fields."
    assert not df['neutralSite'].isna().any(), \
        "neutralSite should be non-null after dropping canceled games."

    # 3) Types + postseason mapping
    df['week'] = pd.to_numeric(df['week'], errors='raise', downcast='integer')
    df.loc[df['seasonType'].eq('postseason'), 'week'] = 18
    df['week'] = df['week'].astype('int16')
    assert (df['week'] >= 1).all(), "week must be >= 1"

    # Pull arrays once 
    hp = pd.to_numeric(df['homePoints'], errors='raise').to_numpy()
    ap = pd.to_numeric(df['awayPoints'], errors='raise').to_numpy()
    ns = df['neutralSite'].astype(bool).to_numpy()
    wk = df['week'].to_numpy()
    home = df['homeTeam'].to_numpy(object)
    away = df['awayTeam'].to_numpy(object)

    # 4) Margins & outcomes
    margin = hp - ap                          # home-perspective true margin
    assert not (margin == 0).any(), "Unexpected tie in completed FBS game."
    home_field = np.where(ns, 0, hfa)         # 0 if neutral, else HFA
    adj_home = margin - home_field            # remove HFA from home side

    home_win = margin > 0
    # away_win = margin < 0  # redundant given assert

    winners = np.where(home_win, home, away)
    losers  = np.where(home_win, away, home)
    hfa_margin = np.where(home_win, adj_home, -adj_home)

    return pd.DataFrame({
        'week': wk,
        'winner': winners,
        'loser': losers,
        'hfa_margin': hfa_margin,
    })


In [None]:
def add_weight(df: pd.DataFrame, decay: float = 1/3) -> pd.DataFrame:
    """
    Calculate weights for college football games based on team game counts and recency.
    
    Weight formula: sqrt((total_games / max_total_games) / (weeks_ago ** decay))
    Weights are normalized to sum to 100.
    
    Args:
        df: DataFrame with columns ['week', 'winner', 'loser', 'hfa_margin']
        decay: Time decay factor for recency weighting (default: 1/3)
    
    Returns:
        DataFrame with columns ['week', 'winner', 'loser', 'hfa_margin', 'weight']
        
    Performance: ~14.8x faster than naive pandas approach using:
        - pd.factorize() for efficient team encoding
        - np.bincount() for fast game counting  
        - Pure numpy operations for mathematical calculations
    """
    # Handle empty DataFrame edge case

    # --- fail-fast checks ---
    assert decay > 0, "decay must be > 0"
    assert not df[['winner','loser']].isna().any().any(), "winner/loser must be non-null"
    assert (df['week'] >= 1).all(), "week must be >= 1"
    assert len(df) > 0, "empty dataframe"
    
    if df.empty:
        return df.assign(weight=pd.Series(dtype='float64'))[
            ['week', 'winner', 'loser', 'hfa_margin', 'weight']
        ]
    
    # Extract numpy arrays once to minimize pandas overhead
    winner_vals = df['winner'].values
    loser_vals = df['loser'].values
    week_vals = df['week'].values
    
    # Efficient team encoding using pandas factorize
    both_teams = np.concatenate([winner_vals, loser_vals])
    codes, _ = pd.factorize(both_teams, sort=False)
    
    # Fast game counting using numpy bincount
    n = len(df)
    counts = np.bincount(codes)
    winner_games = counts[codes[:n]]
    loser_games = counts[codes[n:]]
    
    # Pure numpy calculations for maximum speed
    total_games = winner_games + loser_games
    weeks_ago = (week_vals.max() + 1) - week_vals
    max_games = total_games.max()
    
    # Calculate weights using vectorized operations
    if max_games > 0:
        weights = np.sqrt((total_games / max_games) / (weeks_ago ** decay))
        # Normalize to sum to 100
        weights *= (100.0 / weights.sum())
    else:
        # Edge case: no games played (shouldn't happen in real data)
        weights = np.zeros(n, dtype=np.float64)
    
    # Return result with weight column
    result = df[['week', 'winner', 'loser', 'hfa_margin']].copy()
    result['weight'] = weights
    return result

In [None]:
df_raw = prepare_schedule(api_response)

add_weight(df_raw)

## DEV BELOW THIS CELL

In [None]:
from scipy.sparse import lil_matrix
from scipy.sparse.linalg import lsqr

def get_initial(schedule):

    extras = schedule[['hfa_margin', 'weight']]
    transform = schedule.drop(['hfa_margin', 'weight'], axis = 1)

    # Get a list of all unique teams
    teams = sorted(set(transform['winner'].unique()).union(transform['loser'].unique()))

    # Create a new DataFrame with teams as columns using Scipy's sparse lil_matrix
    n = len(transform)
    m = len(teams)
    x = lil_matrix((n, m), dtype=int)

    # Create a dictionary to map teams to their respective column indices
    team_indices = {team: index for index, team in enumerate(teams)}

    # Fill in the sparse matrix with 1 for winners and -1 for losers efficiently
    winners = transform['winner'].map(team_indices).values
    losers = transform['loser'].map(team_indices).values

    x[np.arange(n), winners] = 1
    x[np.arange(n), losers] = -1

    #my code
    y = extras['hfa_margin'].to_numpy()
    w = extras['weight'].to_numpy()

    xw = x.multiply(np.sqrt(w[:, np.newaxis]))
    yw = y * np.sqrt(w)

    result, istop, itn, _, _, _, _, _, _, _ = lsqr(xw, yw)

    r1_ratings = pd.DataFrame(data = {'teams': teams, 'coefs': result})
    #r1_ratings.sort_values(by=['coefs'], inplace=True, ascending=False)

    schedule.set_index('winner', inplace=True, drop = False)
    r1_ratings.set_index('teams', inplace=True, drop = False)
    with_winner = schedule.join(r1_ratings, how='left').set_index('loser', drop = False)

    with_ratings = with_winner.join(r1_ratings, how = 'left', lsuffix='_winner', rsuffix='_loser').drop(['teams_winner', 'teams_loser'], axis = 1)
    with_ratings.reset_index(inplace = True, drop = True)

    return with_ratings

In [None]:
get_initial(

In [None]:
df_raw = add_weight(prepare_schedule(api_response))

get_initial(df_raw)

In [None]:
def get_rating(subject, initial):
    with_ratings = initial[['winner', 'loser', 'hfa_margin', 'weight','coefs_winner', 'coefs_loser']]
    subject_mask = (with_ratings['winner'] == subject) | (with_ratings['loser'] == subject)
    subject_data = with_ratings[subject_mask].copy()
    subject_data['hfa_margin'] *= np.where(subject_data['winner'] == subject, 1, -1)
    subject_data.columns = ['team1', 'team2', 'hfa_margin', 'weight', 'rating_team1', 'rating_team2']

    subject_data['y'] = subject_data['hfa_margin']+subject_data['rating_team2']
    subject_data['x'] = 1
    x = subject_data['x'].to_numpy()
    y = subject_data['y'].to_numpy()
    w = subject_data['weight'].to_numpy()

    # Apply weights to x and y
    xw = x * np.sqrt(w)
    yw = y * np.sqrt(w)

    A = xw[:, np.newaxis]

    result, _, _, _ = np.linalg.lstsq(A, yw, rcond=0.1)

    return result[0]

def get_ratings(schedule):
    initial = get_initial(schedule)
    teams = sorted(set(schedule['winner'].unique()).union(schedule['loser'].unique()))
    output_list = list(map(lambda x: get_rating(x, initial), teams))
    ratings = pd.DataFrame(list(zip(teams, output_list)), columns=['teams', 'ratings'])
    return ratings.sort_values("ratings", axis = 0, ascending = False)

def get_error(schedule, ratings):
    error_schedule = schedule.drop(['week'], axis = 1)
    ratings.sort_values(by=['ratings'], inplace=True, ascending=False)

    error_schedule.set_index('winner', inplace=True, drop = False)
    ratings.set_index('teams', inplace=True, drop = False)
    with_winner = error_schedule.join(ratings, how='left').set_index('loser', drop = False)

    with_ratings = with_winner.join(ratings, how = 'left', lsuffix='_winner', rsuffix='_loser').drop(['teams_winner', 'teams_loser'], axis = 1)
    with_ratings.reset_index(inplace = True, drop = True)
    with_ratings['error'] = (with_ratings['hfa_margin'] - (with_ratings['ratings_winner'] - with_ratings['ratings_loser']))**2

    with_ratings.drop(['hfa_margin','ratings_winner', 'ratings_loser'], inplace = True, axis = 1)

    with_ratings2 = with_ratings.copy()

    with_ratings.columns = ['team1', 'team2', 'weight', 'error']
    with_ratings2.columns = ['team2', 'team1', 'weight', 'error']

    error_set = (pd.concat([with_ratings, with_ratings2], ignore_index=True)).drop(['team2'], axis = 1)
    ##need to factor in weight
    error_sum = pd.DataFrame(error_set.groupby(by = 'team1', axis=0).apply(lambda x: (x.weight*x.error).sum()))
    error_count = error_set.drop(['weight'], axis = 1).groupby(by = 'team1', axis=0).count()


    error_total = error_sum.join(error_count, lsuffix = "r", rsuffix = "l")
    error_total.reset_index(inplace = True)
    error_total.columns = ['team', 'error', 'games']

    error_total['rmse'] = (error_total['error']/error_total['games'])**0.5
    error_total['psudo_sd'] = ((error_total['rmse']*error_total['games'])+6*22)/(error_total['games']+22)
    error = error_total.drop(['error','games','rmse'], axis = 1)
    return error

def combined(ratings, error):
    error.set_index('team', drop = False, inplace = True)
    rating_error = ratings.join(error, how = 'left', lsuffix='_l', rsuffix='_r').drop(['teams','team'], axis = 1).reset_index()
    rating_error.columns = ['team','rating','psudo_sd']
    return rating_error

def error_hfa(x, year, week, soup=None):
    hfa = x
    if soup is None:
        schedule, _ = get_schedule(year, week=week, hfa = hfa, decay = 0)
    else:
        schedule, _ = get_schedule(year, week=week, hfa = hfa, decay = 0, soup = soup)

    ratings = get_ratings(schedule)
    return get_error(schedule, ratings)['psudo_sd'].sum()

def error_decay(x, hfa, year, week, soup=None):
    decay = x
    if soup is None:
        schedule, _ = get_schedule(year, week=week, hfa = hfa, decay = decay)
    else:
        schedule, _ = get_schedule(year, week=week, hfa = hfa, decay = decay, soup = soup)

    ratings = get_ratings(schedule)
    return get_error(schedule, ratings)['psudo_sd'].sum()


In [None]:
prepare_schedule(api_response)

In [None]:
schedule = add_weight(prepare_schedule(api_response))
ratings = get_ratings(schedule)
error = get_error(schedule, ratings)

combined(ratings, error)

### HANK'S CODE BELOW

In [None]:
get_worster(api_response, ly_api_response)
#need to exclude FCS in the input

In [None]:
import numpy as np
import pandas as pd
from typing import Iterable, Any

# ---------- 1) Minimal schedule extraction (winner/loser only) ----------
def prepare_schedule(api_response: Iterable[Any]) -> pd.DataFrame:
    """
    Return a DataFrame with columns ['winner','loser'] for decided games.
    Drops canceled/incomplete; asserts no ties.
    """
    cols = ['homeTeam','awayTeam','homePoints','awayPoints']
    raw = (pd.DataFrame.from_records((g.to_dict() for g in api_response))
             .reindex(columns=cols))
    raw = raw.dropna(subset=['homePoints','awayPoints']).reset_index(drop=True)
    if raw.empty:
        return pd.DataFrame(columns=['winner','loser'])

    # Fail fast
    assert not raw[['homeTeam','awayTeam']].isna().any().any(), "Null team name(s)."
    hp = pd.to_numeric(raw['homePoints'], errors='raise').to_numpy()
    ap = pd.to_numeric(raw['awayPoints'], errors='raise').to_numpy()
    margin = hp - ap
    assert (margin != 0).all(), "Unexpected tie in completed game."

    home_win = margin > 0
    winners = np.where(home_win, raw['homeTeam'].to_numpy(), raw['awayTeam'].to_numpy())
    losers  = np.where(home_win, raw['awayTeam'].to_numpy(), raw['homeTeam'].to_numpy())
    return pd.DataFrame({'winner': winners, 'loser': losers})


# ---------- 2) Fast résumé features for a season ----------
def create_team_metrics(schedule: pd.DataFrame, K: int = 15, prefix: str = "") -> pd.DataFrame:
    """
    Build résumé features:
      {prefix}wins, {prefix}losses,
      {prefix}wins_from_1best, {prefix}wins_from_1worst, ... up to K
    Opponent win totals are computed from the same schedule (as-of-today).
    Column order matches your original (interleaved best/worst per i).
    """
    assert {'winner','loser'}.issubset(schedule.columns), "schedule must have ['winner','loser']"
    n_games = len(schedule)
    if n_games == 0:
        cols = ['team', f'{prefix}wins', f'{prefix}losses']
        for i in range(1, K+1):
            cols += [f'{prefix}wins_from_{i}best', f'{prefix}wins_from_{i}worst']
        return pd.DataFrame(columns=cols)

    # Encode to integers once
    both = pd.concat([schedule['winner'], schedule['loser']], ignore_index=True)
    codes, teams = pd.factorize(both, sort=True)
    T = len(teams)
    win_codes = codes[:n_games]
    lose_codes = codes[n_games:]

    # Wins & losses
    wins   = np.bincount(win_codes, minlength=T).astype(np.int16)
    losses = np.bincount(lose_codes, minlength=T).astype(np.int16)

    # Group beaten opponents per team (via sort/split)
    order_w = np.argsort(win_codes, kind='mergesort')
    losers_sorted = lose_codes[order_w]
    ends_w = np.cumsum(wins)
    starts_w = np.concatenate(([0], ends_w[:-1]))
    beaten_lists = [losers_sorted[starts_w[i]:ends_w[i]] for i in range(T)]

    # Group opponents each team lost to
    order_l = np.argsort(lose_codes, kind='mergesort')
    winners_sorted = win_codes[order_l]
    ends_l = np.cumsum(losses)
    starts_l = np.concatenate(([0], ends_l[:-1]))
    lostto_lists = [winners_sorted[starts_l[i]:ends_l[i]] for i in range(T)]

    # Precompute ladders (interleaved order later)
    best  = np.zeros((T, K), dtype=np.int16)
    worst = np.zeros((T, K), dtype=np.int16)
    for i in range(T):
        if beaten_lists[i].size:
            b = np.sort(wins[beaten_lists[i]])[::-1]     # descending
            best[i, :min(K, b.size)] = b[:K]
        if lostto_lists[i].size:
            w = np.sort(wins[lostto_lists[i]])           # ascending
            worst[i, :min(K, w.size)] = w[:K]

    # Assemble with exact column order: wins, losses, then interleaved best/worst
    data = {'team': teams.to_numpy(),
            f'{prefix}wins': wins,
            f'{prefix}losses': losses}
    for i in range(1, K+1):
        data[f'{prefix}wins_from_{i}best']  = best[:, i-1]
        data[f'{prefix}wins_from_{i}worst'] = worst[:, i-1]
    return pd.DataFrame(data)


# ---------- 3) Join current + last year and rank ----------
def get_worster(
    api_response: Iterable[Any],
    ly_api_response: Iterable[Any],
    K: int = 15,
) -> pd.DataFrame:
    """Rank by current-year keys, then last-year keys (tie-break), with column order matching your original."""
    cur_sched = prepare_schedule(api_response)
    ly_sched  = prepare_schedule(ly_api_response)

    cur_df = create_team_metrics(cur_sched, K=K, prefix="")
    ly_df  = create_team_metrics(ly_sched,  K=K, prefix="ly_")

    # Left join (current year is the universe), one-to-one expected
    joined = (cur_df.set_index('team')
                    .join(ly_df.set_index('team'), how='left', validate='one_to_one')
                    .reset_index())

    # Fill NaNs from LY-missing teams and cast back to ints
    ly_cols = ['ly_wins','ly_losses'] + \
              [f'ly_wins_from_{i}best' for i in range(1, K+1)] + \
              [f'ly_wins_from_{i}worst' for i in range(1, K+1)]
    for c in ly_cols:
        if c in joined.columns:
            joined[c] = joined[c].fillna(0).astype(np.int16)

    # Enforce final column order exactly like your original
    ordered_cols = ['team', 'wins', 'losses']
    for i in range(1, K+1):
        ordered_cols += [f'wins_from_{i}best', f'wins_from_{i}worst']
    ordered_cols += ['ly_wins', 'ly_losses']
    for i in range(1, K+1):
        ordered_cols += [f'ly_wins_from_{i}best', f'ly_wins_from_{i}worst']
    joined = joined.reindex(columns=ordered_cols)

    # Sort keys: wins; then interleaved current best/worst; ly_wins; then interleaved ly best/worst
    sort_cols = ['wins']
    asc_flags = [False]
    for i in range(1, K+1):
        sort_cols += [f'wins_from_{i}best', f'wins_from_{i}worst']
        asc_flags += [False, False]
    sort_cols += ['ly_wins']
    asc_flags += [False]
    for i in range(1, K+1):
        sort_cols += [f'ly_wins_from_{i}best', f'ly_wins_from_{i}worst']
        asc_flags += [False, False]

    joined = joined.sort_values(by=sort_cols, ascending=asc_flags, kind='mergesort').reset_index(drop=True)
    return joined
