# CFP Committee Simulator - Ranking Algorithms

This notebook implements the core ranking algorithms: Colley Matrix, Massey Ratings, and Elo System


In [1]:
# Cell 1: Setup
import pandas as pd
import numpy as np
from scipy import linalg
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Load cached data (with fallback to CSV if parquet not available)
def load_cached_games(year, week):
    """Load cached games, trying parquet first, then CSV"""
    cache_dir = f'./data/cache/{year}'
    parquet_path = f'{cache_dir}/games_w{week}.parquet'
    csv_path = f'{cache_dir}/games_w{week}.csv'
    
    # Try parquet first
    if os.path.exists(parquet_path):
        try:
            return pd.read_parquet(parquet_path)
        except (ImportError, ModuleNotFoundError):
            print("⚠️  Parquet not available, trying CSV...")
    
    # Fallback to CSV
    if os.path.exists(csv_path):
        return pd.read_csv(csv_path)
    
    raise FileNotFoundError(f"No cached data found for year {year}, week {week}")

year = 2025  # 2025-2026 season
week = 15
games_df = load_cached_games(year, week)
print(f"✅ Loaded {len(games_df)} FBS games for {year} season, week {week}")

✅ Loaded 557 FBS games for 2025 season, week 15


In [2]:
# Cell 2: Colley Matrix Implementation
class ColleyMatrix:
    """Resume-based ranking using only wins/losses"""
    
    def __init__(self, games_df):
        self.games = games_df
        self.teams = sorted(list(set(
            games_df['home_team'].unique().tolist() + 
            games_df['away_team'].unique().tolist()
        )))
        self.n_teams = len(self.teams)
        self.team_idx = {team: i for i, team in enumerate(self.teams)}
        
    def build_system(self):
        """Build Colley matrix C and vector b"""
        # Initialize matrix and vector
        C = np.zeros((self.n_teams, self.n_teams))
        b = np.ones(self.n_teams)
        
        # Count games and calculate records
        for _, game in self.games.iterrows():
            home_idx = self.team_idx[game['home_team']]
            away_idx = self.team_idx[game['away_team']]
            
            # Update diagonal (games played + 2)
            C[home_idx, home_idx] += 1
            C[away_idx, away_idx] += 1
            
            # Update off-diagonal (negative games between teams)
            C[home_idx, away_idx] -= 1
            C[away_idx, home_idx] -= 1
            
            # Update b vector based on wins/losses
            if game['home_score'] > game['away_score']:
                b[home_idx] += 0.5
                b[away_idx] -= 0.5
            else:
                b[home_idx] -= 0.5
                b[away_idx] += 0.5
        
        # Add 2 to diagonal (Laplace rule)
        np.fill_diagonal(C, C.diagonal() + 2)
        
        return C, b
    
    def solve(self):
        """Solve Cr = b for ratings"""
        C, b = self.build_system()
        ratings = linalg.solve(C, b)
        
        # Create results dataframe
        results = pd.DataFrame({
            'team': self.teams,
            'colley_rating': ratings
        }).sort_values('colley_rating', ascending=False)
        
        return results

# Run Colley rankings
colley = ColleyMatrix(games_df)
colley_rankings = colley.solve()
print("Top 10 Colley Rankings:")
colley_rankings.head(10)



Top 10 Colley Rankings:


Unnamed: 0,team,colley_rating
79,Notre Dame,0.881386
45,James Madison,0.877468
2,Alabama,0.863446
81,Ohio State,0.857013
10,BYU,0.832218
86,Oregon,0.832145
34,Georgia,0.831865
41,Indiana,0.811482
85,Ole Miss,0.810566
123,Utah,0.791056


In [3]:
# Cell 3: Massey Ratings Implementation
class MasseyRatings:
    """Predictive power rating using margin of victory"""
    
    def __init__(self, games_df, mov_cap=28, hfa_adjustment=3.75):
        self.games = games_df.copy()
        self.mov_cap = mov_cap
        self.hfa = hfa_adjustment
        self.teams = sorted(list(set(
            games_df['home_team'].unique().tolist() + 
            games_df['away_team'].unique().tolist()
        )))
        self.n_teams = len(self.teams)
        self.team_idx = {team: i for i, team in enumerate(self.teams)}
        
    def apply_adjustments(self):
        """Apply MOV cap and home field adjustment"""
        # Calculate margins
        self.games['margin'] = self.games['home_score'] - self.games['away_score']
        
        # Apply MOV cap
        self.games['adj_margin'] = self.games['margin'].clip(-self.mov_cap, self.mov_cap)
        
        # Adjust for home field advantage (unless neutral site)
        self.games.loc[~self.games['neutral_site'], 'adj_margin'] -= self.hfa
        
    def build_system(self):
        """Build Massey matrix M and vector p"""
        self.apply_adjustments()
        
        M = np.zeros((self.n_teams, self.n_teams))
        p = np.zeros(self.n_teams)
        
        for _, game in self.games.iterrows():
            home_idx = self.team_idx[game['home_team']]
            away_idx = self.team_idx[game['away_team']]
            margin = game['adj_margin']
            
            # Update diagonal (games played)
            M[home_idx, home_idx] += 1
            M[away_idx, away_idx] += 1
            
            # Update off-diagonal
            M[home_idx, away_idx] -= 1
            M[away_idx, home_idx] -= 1
            
            # Update point differential vector
            p[home_idx] += margin
            p[away_idx] -= margin
        
        # Replace last row to break singularity (sum of ratings = 0)
        M[-1, :] = 1
        p[-1] = 0
        
        return M, p
    
    def solve(self):
        """Solve Mr = p for ratings using least squares for robustness"""
        M, p = self.build_system()
        try:
            ratings = linalg.solve(M, p)
        except np.linalg.LinAlgError:
            # Fallback to least squares if matrix is singular
            ratings, residuals, rank, s = linalg.lstsq(M, p, rcond=None)
            if rank < self.n_teams:
                print(f"⚠️  Warning: Massey matrix rank {rank} < {self.n_teams} teams. Using least squares solution.")
        
        results = pd.DataFrame({
            'team': self.teams,
            'massey_rating': ratings
        }).sort_values('massey_rating', ascending=False)
        
        return results

# Run Massey ratings
massey = MasseyRatings(games_df)
massey_rankings = massey.solve()
print("Top 10 Massey Rankings:")
massey_rankings.head(10)


Top 10 Massey Rankings:


  -4.75 -31.75  10.25  11.25  16.25   7.25  21.25   8.25  -8.75   4.25
   1.25 -31.75 -21.75  -6.75  -6.75   2.25  24.25  -0.75  -7.75 -21.75
  14.25 -20.75 -12.75 -10.75   4.25  10.25 -31.75  21.25  -8.75  -5.75
  13.25  -2.75  24.25 -20.75  24.25  -9.75  -6.75  18.25  -0.75 -22.75
  -6.75  23.25  24.25  -6.75   3.25  17.25  10.25 -19.75  24.25 -31.75
   4.25   2.25  -0.75  17.25  -2.75 -21.75  10.25   2.25 -10.75 -21.75
  24.25   2.25  -6.75   1.25   9.25   0.25 -10.75  -7.75  12.25 -14.75
  -2.75  24.25   4.25  17.25  24.25  24.25  -2.75   7.25  24.25  -9.75
 -17.75 -27.75   3.25  18.25  10.25  24.25  -9.75 -10.75  24.25  -0.75
 -27.75  -5.75 -14.75  24.25   3.25  -6.75 -30.75  15.25  24.25 -25.75
  13.25 -28.75   5.25  -6.75 -21.75  -6.75 -16.75   6.25   1.25  20.25
  -0.75   2.25  24.25  24.25 -24.75 -28.75  -6.75  20.25 -13.75   3.25
   9.25  24.25  -4.75  11.25  -0.75  -4.75  -0.75  16.25  13.25  11.25
 -31.75   3.25 -13.75 -31.75   3.25  14.25  24.25  21.25   6.25  -9.75
 -10.7

Unnamed: 0,team,massey_rating
81,Ohio State,30.147924
41,Indiana,26.460613
79,Notre Dame,24.662294
86,Oregon,24.058307
99,South Florida,21.392476
42,Iowa,19.193937
60,Miami,18.258618
120,USC,17.275692
76,North Texas,16.321349
93,SMU,16.117171


In [4]:
# Cell 4: Elo System Implementation
class EloRatings:
    """Dynamic rating system that updates game-by-game"""
    
    def __init__(self, k_factor=20, hfa=55, season_regression=0.95):
        self.k = k_factor
        self.hfa = hfa
        self.regression = season_regression
        self.ratings = {}
        
    def initialize_ratings(self, teams, prev_ratings=None):
        """Initialize team ratings"""
        if prev_ratings:
            # Regress 95% toward mean (1500)
            for team in teams:
                if team in prev_ratings:
                    self.ratings[team] = 1500 + self.regression * (prev_ratings[team] - 1500)
                else:
                    self.ratings[team] = 1500
        else:
            self.ratings = {team: 1500 for team in teams}
    
    def expected_score(self, rating_a, rating_b):
        """Calculate expected win probability"""
        return 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    
    def update_game(self, home_team, away_team, home_won):
        """Update ratings based on game result"""
        # Apply home field advantage
        home_rating = self.ratings[home_team] + self.hfa
        away_rating = self.ratings[away_team]
        
        # Calculate expected scores
        home_expected = self.expected_score(home_rating, away_rating)
        away_expected = 1 - home_expected
        
        # Calculate actual scores (1 for win, 0 for loss)
        home_actual = 1 if home_won else 0
        away_actual = 1 - home_actual
        
        # Update ratings
        self.ratings[home_team] += self.k * (home_actual - home_expected)
        self.ratings[away_team] += self.k * (away_actual - away_expected)
    
    def process_season(self, games_df):
        """Process all games chronologically"""
        # Get unique teams
        teams = set(games_df['home_team'].unique()) | set(games_df['away_team'].unique())
        self.initialize_ratings(teams)
        
        # Sort games by date/week
        games_sorted = games_df.sort_values(['week', 'game_id'])
        
        # Process each game
        for _, game in games_sorted.iterrows():
            home_won = game['home_score'] > game['away_score']
            self.update_game(game['home_team'], game['away_team'], home_won)
        
        # Return final ratings
        results = pd.DataFrame([
            {'team': team, 'elo_rating': rating} 
            for team, rating in self.ratings.items()
        ]).sort_values('elo_rating', ascending=False)
        
        return results

# Run Elo ratings
elo = EloRatings()
elo_rankings = elo.process_season(games_df)
print("Top 10 Elo Rankings:")
elo_rankings.head(10)


Top 10 Elo Rankings:


Unnamed: 0,team,elo_rating
54,Ohio State,1580.851835
80,James Madison,1578.80976
16,Notre Dame,1578.062849
111,Indiana,1571.241767
129,BYU,1563.93011
87,Georgia,1563.119871
66,Utah,1557.006208
13,Oregon,1556.678122
62,Kennesaw State,1554.957248
7,Texas Tech,1554.588433


In [5]:
# Cell 5: Save Rankings
colley_rankings.to_csv('colley_rankings.csv', index=False)
massey_rankings.to_csv('massey_rankings.csv', index=False)
elo_rankings.to_csv('elo_rankings.csv', index=False)

print("✅ Rankings saved to CSV files")


✅ Rankings saved to CSV files
