In [2]:
# -------------------------------
# FIR Normalized ELO Notebook with Enhanced Date Processing
# -------------------------------
import pandas as pd
import matplotlib.pyplot as plt
import os
from collections import defaultdict
from PIL import Image
# Import enhanced date processing utilities
from date_utils import enhance_match_data_processing, DateProcessor
# Import enhanced ELO processing utilities
from elo_utils import create_enhanced_elo_processor


In [3]:
# Load and normalize data
TEAM_NAME_CORRECTIONS = {
    "Lakeside": "Lakeside (Seattle)",
    "Lakeside (Sea)": "Lakeside (Seattle)",
    "Seattle Prep.": "Seattle Prep",
}

def normalize_team_name(name):
    return TEAM_NAME_CORRECTIONS.get(name, name)

matches_df = pd.read_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\metroleague_soccer_results_filtered.csv")
matches_df["Home Team"] = matches_df["Home Team"].apply(normalize_team_name)
matches_df["Away Team"] = matches_df["Away Team"].apply(normalize_team_name)

class_df = pd.read_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\school_classification_by_season.csv")
class_df["School"] = class_df["School"].apply(normalize_team_name)

matches_df = matches_df.merge(class_df, left_on=["Season", "Home Team"], right_on=["Season", "School"], how="left").rename(columns={"Classification": "Home Classification"}).drop(columns=["School"])
matches_df = matches_df.merge(class_df, left_on=["Season", "Away Team"], right_on=["Season", "School"], how="left").rename(columns={"Classification": "Away Classification"}).drop(columns=["School"])

matches_df["Home Score"] = pd.to_numeric(matches_df["Home Score"], errors="coerce")
matches_df["Away Score"] = pd.to_numeric(matches_df["Away Score"], errors="coerce")
matches_df = matches_df.dropna(subset=["Home Score", "Away Score"])

# Enhanced date-based sorting and validation using new utilities
print("Applying enhanced date processing for chronological ELO calculations...")
matches_df = enhance_match_data_processing(matches_df)

# Assign per-team cumulative match numbers
team_match_counter = defaultdict(int)
home_match_counts = []
away_match_counts = []
for _, row in matches_df.iterrows():
    home_team = row["Home Team"]
    away_team = row["Away Team"]
    team_match_counter[home_team] += 1
    team_match_counter[away_team] += 1
    home_match_counts.append(team_match_counter[home_team])
    away_match_counts.append(team_match_counter[away_team])

matches_df["Home Match #"] = home_match_counts
matches_df["Away Match #"] = away_match_counts

# Filter teams by minimum games played
home_games = matches_df.groupby(["Season", "Home Team"]).size().reset_index(name="Home Games")
away_games = matches_df.groupby(["Season", "Away Team"]).size().reset_index(name="Away Games")
home_games.columns = ["Season", "Team", "Home Games"]
away_games.columns = ["Season", "Team", "Away Games"]
games_played = pd.merge(home_games, away_games, on=["Season", "Team"], how="outer").fillna(0)
games_played["Total Games"] = games_played["Home Games"] + games_played["Away Games"]

min_games = 3
valid_teams = games_played[games_played["Total Games"] >= min_games][["Season", "Team"]]
matches_df = matches_df.merge(valid_teams, left_on=["Season", "Home Team"], right_on=["Season", "Team"], how="inner").drop(columns=["Team"])
matches_df = matches_df.merge(valid_teams, left_on=["Season", "Away Team"], right_on=["Season", "Team"], how="inner").drop(columns=["Team"])


In [5]:
# Enhanced ELO calculation with FIR normalization using new utilities
print("=== Enhanced FIR ELO Processing ===")

# Create enhanced ELO processor
elo_processor = create_enhanced_elo_processor(
    base_elo=1500, 
    k=40, 
    hfa=100, 
    cap_margin=3, 
    upset_multiplier=1.5
)

# Calculate ELO with enhanced processing
elo_log_df, final_elos = elo_processor.calculate_elo_enhanced(matches_df)

# Apply FIR normalization
elo_log_df, final_elos = elo_processor.apply_normalization(elo_log_df, final_elos, 'FIR')

# Save results
elo_log_df.to_csv(r"C:\Users\User\OneDrive - 2020 Companies\Desktop\Ingraham Boys Soccer\elo_fir_log.csv", index=False)
