In [13]:
# -------------------------------
# IIR Normalized ELO Notebook
# -------------------------------
import pandas as pd
import matplotlib.pyplot as plt
import os
from collections import defaultdict
from PIL import Image


In [17]:
# Load and normalize data
TEAM_NAME_CORRECTIONS = {
    "Lakeside": "Lakeside (Seattle)",
    "Lakeside (Sea)": "Lakeside (Seattle)",
    "Seattle Prep.": "Seattle Prep",
}

def normalize_team_name(name):
    return TEAM_NAME_CORRECTIONS.get(name, name)

matches_df = pd.read_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\metroleague_soccer_results_filtered.csv")
matches_df["Home Team"] = matches_df["Home Team"].apply(normalize_team_name)
matches_df["Away Team"] = matches_df["Away Team"].apply(normalize_team_name)

class_df = pd.read_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\school_classification_by_season.csv")
class_df["School"] = class_df["School"].apply(normalize_team_name)

matches_df = matches_df.merge(class_df, left_on=["Season", "Home Team"], right_on=["Season", "School"], how="left").rename(columns={"Classification": "Home Classification"}).drop(columns=["School"])
matches_df = matches_df.merge(class_df, left_on=["Season", "Away Team"], right_on=["Season", "School"], how="left").rename(columns={"Classification": "Away Classification"}).drop(columns=["School"])

matches_df["Home Score"] = pd.to_numeric(matches_df["Home Score"], errors="coerce")
matches_df["Away Score"] = pd.to_numeric(matches_df["Away Score"], errors="coerce")
matches_df = matches_df.dropna(subset=["Home Score", "Away Score"])

# Improved date-based sorting using datetime comparison
matches_df['Date_temp'] = pd.to_datetime(matches_df['Date'], errors='coerce')
matches_df = matches_df.sort_values(by=["Season", "Date_temp"]).reset_index(drop=True)
matches_df = matches_df.drop('Date_temp', axis=1)

# Assign per-team cumulative match numbers
team_match_counter = defaultdict(int)
home_match_counts = []
away_match_counts = []
for _, row in matches_df.iterrows():
    home_team = row["Home Team"]
    away_team = row["Away Team"]
    team_match_counter[home_team] += 1
    team_match_counter[away_team] += 1
    home_match_counts.append(team_match_counter[home_team])
    away_match_counts.append(team_match_counter[away_team])

matches_df["Home Match #"] = home_match_counts
matches_df["Away Match #"] = away_match_counts

# Filter teams by minimum games played
home_games = matches_df.groupby(["Season", "Home Team"]).size().reset_index(name="Home Games")
away_games = matches_df.groupby(["Season", "Away Team"]).size().reset_index(name="Away Games")
home_games.columns = ["Season", "Team", "Home Games"]
away_games.columns = ["Season", "Team", "Away Games"]
games_played = pd.merge(home_games, away_games, on=["Season", "Team"], how="outer").fillna(0)
games_played["Total Games"] = games_played["Home Games"] + games_played["Away Games"]

min_games = 3
valid_teams = games_played[games_played["Total Games"] >= min_games][["Season", "Team"]]
matches_df = matches_df.merge(valid_teams, left_on=["Season", "Home Team"], right_on=["Season", "Team"], how="inner").drop(columns=["Team"])
matches_df = matches_df.merge(valid_teams, left_on=["Season", "Away Team"], right_on=["Season", "Team"], how="inner").drop(columns=["Team"])


In [34]:
# Run ELO with IIR normalization
def run_elo_iir(matches, base_elo=1500, k=40, hfa=100, cap_margin=3, upset_multiplier=1.5, n=3):
    match_counts = defaultdict(int)
    team_elos = defaultdict(lambda: base_elo)
    elo_log = []
    last_season = None

    for _, row in matches.iterrows():
        season = row["Season"]
        home = row["Home Team"]
        away = row["Away Team"]
        hs = int(row["Home Score"])
        as_ = int(row["Away Score"])

        # Normalize at new season
        if season != last_season:
            for team in team_elos:
                team_elos[team] = ((n - 1) / n) * team_elos[team] + (1 / n) * base_elo
            last_season = season

        result_home = 1 if hs > as_ else 0 if hs < as_ else 0.5
        home_elo = team_elos[home]
        away_elo = team_elos[away]
        expected_home = 1 / (1 + 10 ** ((away_elo - (home_elo + hfa)) / 400))
        margin = max(1, min(abs(hs - as_), cap_margin))

        if result_home == 0.5:
            surprise = abs(result_home - expected_home)
            k_adjust = 0.5 + (upset_multiplier * surprise)
        else:
            k_adjust = 1.0

        change_home = k_adjust * k * margin * (result_home - expected_home)
        change_away = -change_home

        team_elos[home] += change_home
        team_elos[away] += change_away
        match_counts[home] += 1
        match_counts[away] += 1

        elo_log.append({
            "Season": season,
            "Match #": row["Match #"] if "Match #" in row else None,
            "Home Team": home,
            "Away Team": away,
            "Home Score": hs,
            "Away Score": as_,
            "Home ELO Before": home_elo,
            "Away ELO Before": away_elo,
            "Home ELO After": team_elos[home],
            "Away ELO After": team_elos[away],
            "Home ELO Change": change_home,
            "Away ELO Change": change_away,
            "Home Match #": row["Home Match #"],
            "Away Match #": row["Away Match #"]
        })

    return pd.DataFrame(elo_log), team_elos

elo_log_df, final_elos = run_elo_iir(matches_df)
elo_log_df.to_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\elo_iir_log.csv", index=False)
