In [None]:
import cfbd

In [None]:
import time
import cfbd
from pprint import pprint
import os
from dotenv import load_dotenv
import pandas as pd
import numpy as np

from cfbd.models.division_classification import DivisionClassification
from cfbd.models.game import Game
from cfbd.models.season_type import SeasonType
from cfbd.rest import ApiException
from cfbd.models.game import Game

load_dotenv()
# Defining the host is optional and defaults to https://api.collegefootballdata.com
# See configuration.py for a list of all supported configuration parameters.
configuration = cfbd.Configuration(
    host = "https://api.collegefootballdata.com"
)

# The client must configure the authentication and authorization parameters
# in accordance with the API server security policy.
# Examples for each auth method are provided below, use the example that
# satisfies your auth use case.

# Configure Bearer authorization: apiKey
configuration = cfbd.Configuration(
    access_token = os.environ.get("CFBD_API_KEY")
)



In [None]:
with cfbd.ApiClient(configuration) as api_client:
    # Create an instance of the API class
    api_instance = cfbd.GamesApi(api_client)
    year = 2024 
    division = cfbd.DivisionClassification('fbs')
    
    try:
        api_response = api_instance.get_games(year=year, classification=division)
    except Exception as e:
        print("Exception when calling GamesApi->get_games: %s\n" % e)

In [None]:
from typing import Iterable

def prepare_schedule(
    api_response: Iterable,  
    hfa: int = 3,
    decay: float = 1/3,  
) -> pd.DataFrame:
    """
    Return ['week','winner','loser','hfa_margin'] ready for add_weight().
    - Drop canceled/incomplete games (missing scores)
    - Assert neutralSite complete (per your rule after drop)
    - Winner-perspective, HFA-adjusted margin
    - Assert no ties (FBS)
    """
    
    cols = ['seasonType','week','neutralSite',
            'homeTeam','awayTeam','homePoints','awayPoints']

    # Vectorized load in one shot
    df = pd.DataFrame.from_records((g.to_dict() for g in api_response), columns=cols)

    # 1) Drop canceled / incomplete
    df = df.dropna(subset=['homePoints','awayPoints']).reset_index(drop=True)
    if df.empty:
        return pd.DataFrame(columns=['week','winner','loser','hfa_margin'])

    # 2) Fail-fast invariants
    assert not df[['seasonType','week','homeTeam','awayTeam']].isna().any().any(), \
        "Nulls in required non-score fields."
    assert not df['neutralSite'].isna().any(), \
        "neutralSite should be non-null after dropping canceled games."

    # 3) Types + postseason mapping
    df['week'] = pd.to_numeric(df['week'], errors='raise', downcast='integer')
    df.loc[df['seasonType'].eq('postseason'), 'week'] = 18
    df['week'] = df['week'].astype('int16')
    assert (df['week'] >= 1).all(), "week must be >= 1"

    # Pull arrays once 
    hp = pd.to_numeric(df['homePoints'], errors='raise').to_numpy()
    ap = pd.to_numeric(df['awayPoints'], errors='raise').to_numpy()
    ns = df['neutralSite'].astype(bool).to_numpy()
    wk = df['week'].to_numpy()
    home = df['homeTeam'].to_numpy(object)
    away = df['awayTeam'].to_numpy(object)

    # 4) Margins & outcomes
    margin = hp - ap                          # home-perspective true margin
    assert not (margin == 0).any(), "Unexpected tie in completed FBS game."
    home_field = np.where(ns, 0, hfa)         # 0 if neutral, else HFA
    adj_home = margin - home_field            # remove HFA from home side

    home_win = margin > 0
    # away_win = margin < 0  # redundant given assert

    winners = np.where(home_win, home, away)
    losers  = np.where(home_win, away, home)
    hfa_margin = np.where(home_win, adj_home, -adj_home)

    return pd.DataFrame({
        'week': wk,
        'winner': winners,
        'loser': losers,
        'hfa_margin': hfa_margin,
    })


In [None]:
def add_weight(df: pd.DataFrame, decay: float = 1/3) -> pd.DataFrame:
    """
    Calculate weights for college football games based on team game counts and recency.
    
    Weight formula: sqrt((total_games / max_total_games) / (weeks_ago ** decay))
    Weights are normalized to sum to 100.
    
    Args:
        df: DataFrame with columns ['week', 'winner', 'loser', 'hfa_margin']
        decay: Time decay factor for recency weighting (default: 1/3)
    
    Returns:
        DataFrame with columns ['week', 'winner', 'loser', 'hfa_margin', 'weight']
        
    Performance: ~14.8x faster than naive pandas approach using:
        - pd.factorize() for efficient team encoding
        - np.bincount() for fast game counting  
        - Pure numpy operations for mathematical calculations
    """
    # Handle empty DataFrame edge case

    # --- fail-fast checks ---
    assert decay > 0, "decay must be > 0"
    assert not df[['winner','loser']].isna().any().any(), "winner/loser must be non-null"
    assert (df['week'] >= 1).all(), "week must be >= 1"
    assert len(df) > 0, "empty dataframe"
    
    if df.empty:
        return df.assign(weight=pd.Series(dtype='float64'))[
            ['week', 'winner', 'loser', 'hfa_margin', 'weight']
        ]
    
    # Extract numpy arrays once to minimize pandas overhead
    winner_vals = df['winner'].values
    loser_vals = df['loser'].values
    week_vals = df['week'].values
    
    # Efficient team encoding using pandas factorize
    both_teams = np.concatenate([winner_vals, loser_vals])
    codes, _ = pd.factorize(both_teams, sort=False)
    
    # Fast game counting using numpy bincount
    n = len(df)
    counts = np.bincount(codes)
    winner_games = counts[codes[:n]]
    loser_games = counts[codes[n:]]
    
    # Pure numpy calculations for maximum speed
    total_games = winner_games + loser_games
    weeks_ago = (week_vals.max() + 1) - week_vals
    max_games = total_games.max()
    
    # Calculate weights using vectorized operations
    if max_games > 0:
        weights = np.sqrt((total_games / max_games) / (weeks_ago ** decay))
        # Normalize to sum to 100
        weights *= (100.0 / weights.sum())
    else:
        # Edge case: no games played (shouldn't happen in real data)
        weights = np.zeros(n, dtype=np.float64)
    
    # Return result with weight column
    result = df[['week', 'winner', 'loser', 'hfa_margin']].copy()
    result['weight'] = weights
    return result

In [None]:
df_raw = prepare_schedule(api_response)

add_weight(df_raw)