In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import pickle
import os

Matplotlib is building the font cache; this may take a moment.


In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("notebook", font_scale=1.2)

K_FACTOR = 32  # Determines how quickly ratings change (can be adjusted)
HOME_ADVANTAGE = 100  # ELO points given to home team (can be adjusted)
INITIAL_ELO = 1500  # Starting ELO for all teams
REGRESSION_FACTOR = 0.75  # Season-to-season regression factor (can be adjusted)

In [None]:

# Function to calculate expected win probability based on ELO difference
def expected_win_probability(team1_elo, team2_elo):
    """
    Calculate the expected win probability for team1 against team2.
    Based on the ELO formula: E = 1 / (1 + 10^((team2_elo - team1_elo) / 400))
    
    Args:
        team1_elo (float): ELO rating of team 1
        team2_elo (float): ELO rating of team 2
        
    Returns:
        float: Expected win probability for team 1
    """
    return 1 / (1 + 10 ** ((team2_elo - team1_elo) / 400))

# Function to update ELO ratings after a game
def update_elo(team1_elo, team2_elo, team1_win, k_factor=K_FACTOR, home_team=None):
    """
    Update ELO ratings for two teams after a game.
    
    Args:
        team1_elo (float): Current ELO rating of team 1
        team2_elo (float): Current ELO rating of team 2
        team1_win (bool): True if team 1 won, False if team 2 won
        k_factor (float): K-factor determining rating change magnitude
        home_team (int, optional): 1 if team1 is home, 2 if team2 is home, None if neutral site
        
    Returns:
        tuple: Updated ELO ratings for both teams (team1_new_elo, team2_new_elo)
    """
    # Apply home court advantage
    adj_team1_elo = team1_elo
    adj_team2_elo = team2_elo
    
    if home_team == 1:
        adj_team1_elo += HOME_ADVANTAGE
    elif home_team == 2:
        adj_team2_elo += HOME_ADVANTAGE
    
    # Calculate expected win probabilities
    team1_expected = expected_win_probability(adj_team1_elo, adj_team2_elo)
    team2_expected = 1 - team1_expected
    
    # Calculate actual outcome (1 for win, 0 for loss)
    team1_actual = 1 if team1_win else 0
    team2_actual = 1 - team1_actual
    
    # Update ELO ratings
    team1_new_elo = team1_elo + k_factor * (team1_actual - team1_expected)
    team2_new_elo = team2_elo + k_factor * (team2_actual - team2_expected)
    
    return team1_new_elo, team2_new_elo

# Optional: Function to adjust K-factor based on margin of victory
def margin_of_victory_k(base_k, margin, winner_elo, loser_elo):
    """
    Adjust K-factor based on margin of victory and the difference in team ratings.
    Prevents excessive rating changes from blowouts against weaker teams.
    
    Args:
        base_k (float): Base K-factor
        margin (int): Point difference in the game
        winner_elo (float): ELO rating of winning team
        loser_elo (float): ELO rating of losing team
        
    Returns:
        float: Adjusted K-factor
    """
    # Calculate ELO difference
    elo_diff = winner_elo - loser_elo
    
    # Expected margin based on ELO difference
    expected_margin = elo_diff / 25  # This is a simplification
    
    # Adjust K based on whether the margin exceeded expectations
    if margin > expected_margin:
        # Cap the MOV bonus to prevent extreme swings
        mov_factor = min(margin / expected_margin, 2.5) if expected_margin > 0 else 1.5
        return base_k * mov_factor
    else:
        return base_k

# Function to apply season-to-season regression
def regress_ratings(ratings_dict, regression_factor=REGRESSION_FACTOR):
    """
    Apply regression toward the mean for all teams' ratings between seasons.
    
    Args:
        ratings_dict (dict): Dictionary of team ratings
        regression_factor (float): Factor determining how much ratings regress to mean
        
    Returns:
        dict: Dictionary with regressed ratings
    """
    regressed_ratings = {}
    for team, rating in ratings_dict.items():
        regressed_ratings[team] = INITIAL_ELO + regression_factor * (rating - INITIAL_ELO)
    return regressed_ratings

# Class to manage ELO ratings for all teams
class EloRatingSystem:
    def __init__(self, initial_elo=INITIAL_ELO):
        self.ratings = {}  # Dictionary to store team ratings
        self.history = {}  # Dictionary to store rating history
        self.initial_elo = initial_elo
        
    def get_rating(self, team):
        """Get current rating for a team, or assign initial rating if not present"""
        if team not in self.ratings:
            self.ratings[team] = self.initial_elo
            self.history[team] = [(datetime.now(), self.initial_elo)]
        return self.ratings[team]
    
    def update_ratings(self, team1, team2, team1_win, k_factor=K_FACTOR, home_team=None, margin=None):
        """Update ratings after a game"""
        team1_elo = self.get_rating(team1)
        team2_elo = self.get_rating(team2)
        
        # Optionally adjust K-factor based on margin of victory
        if margin is not None:
            winner = team1 if team1_win else team2
            loser = team2 if team1_win else team1
            winner_elo = team1_elo if team1_win else team2_elo
            loser_elo = team2_elo if team1_win else team1_elo
            k_factor = margin_of_victory_k(k_factor, margin, winner_elo, loser_elo)
        
        # Update ratings
        team1_new_elo, team2_new_elo = update_elo(team1_elo, team2_elo, team1_win, k_factor, home_team)
        
        # Store new ratings
        self.ratings[team1] = team1_new_elo
        self.ratings[team2] = team2_new_elo
        
        # Update history
        now = datetime.now()
        self.history[team1].append((now, team1_new_elo))
        self.history[team2].append((now, team2_new_elo))
    
    def end_season(self, regression_factor=REGRESSION_FACTOR):
        """Apply end-of-season regression to all ratings"""
        self.ratings = regress_ratings(self.ratings, regression_factor)
        
        # Update history with regressed ratings
        now = datetime.now()
        for team, rating in self.ratings.items():
            self.history[team].append((now, rating))
    
    def save_ratings(self, filename='bradyball_elo_ratings.pkl'):
        """Save current ratings to a file"""
        with open(filename, 'wb') as f:
            pickle.dump({
                'ratings': self.ratings,
                'history': self.history
            }, f)
            
    def load_ratings(self, filename='bradyball_elo_ratings.pkl'):
        """Load ratings from a file"""
        if os.path.exists(filename):
            with open(filename, 'rb') as f:
                data = pickle.load(f)
                self.ratings = data['ratings']
                self.history = data['history']
            return True
        return False
    
    def get_top_teams(self, n=25):
        """Get the top N teams by ELO rating"""
        sorted_teams = sorted(self.ratings.items(), key=lambda x: x[1], reverse=True)
        return sorted_teams[:n]
    
    def plot_rating_history(self, teams, title="ELO Rating History"):
        """Plot rating history for selected teams"""
        plt.figure(figsize=(12, 8))
        
        for team in teams:
            if team in self.history:
                dates, ratings = zip(*self.history[team])
                plt.plot(dates, ratings, marker='o', linestyle='-', label=team)
        
        plt.title(title)
        plt.xlabel('Date')
        plt.ylabel('ELO Rating')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        
        return plt

# Demo: Process a CSV file of NCAA basketball game results
def process_games_from_csv(csv_file, elo_system=None):
    """
    Process games from a CSV file and update ELO ratings.
    
    Expected CSV format:
    Date,Home,Away,HomeScore,AwayScore,NeutralSite
    2023-11-06,Duke,Kentucky,69,65,0
    """
    if elo_system is None:
        elo_system = EloRatingSystem()
        
    df = pd.read_csv(csv_file)
    
    # Ensure necessary columns exist
    required_cols = ['Date', 'Home', 'Away', 'HomeScore', 'AwayScore']
    if not all(col in df.columns for col in required_cols):
        raise ValueError(f"CSV must contain columns: {required_cols}")
    
    # Process games chronologically
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.sort_values('Date')
    
    for _, game in df.iterrows():
        home_team = game['Home']
        away_team = game['Away']
        home_score = game['HomeScore']
        away_score = game['AwayScore']
        
        # Determine if game is at a neutral site
        neutral_site = False
        if 'NeutralSite' in df.columns:
            neutral_site = bool(game['NeutralSite'])
        
        # Determine home team advantage
        home_team_id = None if neutral_site else 1
        
        # Determine winner and margin
        home_win = home_score > away_score
        margin = abs(home_score - away_score)
        
        # Update ratings
        elo_system.update_ratings(
            team1=home_team,
            team2=away_team,
            team1_win=home_win,
            home_team=home_team_id,
            margin=margin
        )
    
    return elo_system

In [None]:

# Example usage
if __name__ == "__main__":
    # Create new ELO rating system
    ncaa_elo = EloRatingSystem()
    
    # Load game data from CSV (you'd need to replace with your actual data file)
    # ncaa_elo = process_games_from_csv('ncaa_games_2023_2024.csv', ncaa_elo)
    
    # Show top teams
    # top_teams = ncaa_elo.get_top_teams(25)
    # for i, (team, rating) in enumerate(top_teams, 1):
    #     print(f"{i}. {team}: {rating:.1f}")
    
    # Save ratings
    # ncaa_elo.save_ratings('bradyball_elo_final.pkl')
    
    # Plot history for top teams
    # top_team_names = [team for team, _ in top_teams[:5]]
    # ncaa_elo.plot_rating_history(top_team_names)
    # plt.show()