In [8]:
import pandas as pd

# Files and cup winners
season_files = {
    "2020-2021": ("nhl_schedule_2021_2021.csv", "TBL"),
    "2021-2022": ("nhl_schedule_2021_2022.csv", "COL"),
    "2022-2023": ("nhl_schedule_2022_2023.csv", "VGK"),
    "2023-2024": ("nhl_schedule_2023_2024.csv", "FLA"),
    "2024-2025": ("nhl_schedule_2024_2025.csv", None)  # unknown winner
}

# Function to compute Elo
def calculate_elo(games_df, base_elo=1500, k=20, home_adv=35):
    teams = pd.concat([games_df['home_team'], games_df['away_team']]).unique()
    elo = {team: base_elo for team in teams}

    for _, row in games_df.iterrows():
        if pd.isna(row["home_score"]) or pd.isna(row["away_score"]):
            continue

        home = row["home_team"]
        away = row["away_team"]
        hs = row["home_score"]
        as_ = row["away_score"]

        def expected(ra, rb): return 1 / (1 + 10 ** ((rb - ra) / 400))

        home_elo = elo[home] + home_adv
        away_elo = elo[away]
        exp_home = expected(home_elo, away_elo)

        if hs > as_:
            change = k * (1 - exp_home)
            elo[home] += round(change)
            elo[away] -= round(change)
        elif as_ > hs:
            change = k * (exp_home)
            elo[home] -= round(change)
            elo[away] += round(change)

    return elo

# List to collect all seasons
all_seasons = []

# Process each season
for season, (file, winner) in season_files.items():
    df = pd.read_csv(file)

    # Initialize team stats
    summary = {}
    for _, row in df.iterrows():
        if pd.isna(row["home_score"]) or pd.isna(row["away_score"]):
            continue

        home, away = row["home_team"], row["away_team"]
        hs, as_ = int(row["home_score"]), int(row["away_score"])
        gt = row["game_type"]
        
        for team in [home, away]:
            if team not in summary:
                summary[team] = {
                    "team": team, "season": season, "games_played": 0, "playoff_games": 0,
                    "wins": 0, "losses": 0, "goals_for": 0, "goals_against": 0
                }

        # Update stats
        summary[home]["games_played"] += 1
        summary[away]["games_played"] += 1

        summary[home]["goals_for"] += hs
        summary[home]["goals_against"] += as_
        summary[away]["goals_for"] += as_
        summary[away]["goals_against"] += hs

        if gt == 3:
            summary[home]["playoff_games"] +=1
            summary[away]["playoff_games"] +=1
        
        if hs > as_:
            summary[home]["wins"] += 1
            summary[away]["losses"] += 1
        elif as_ > hs:
            summary[away]["wins"] += 1
            summary[home]["losses"] += 1

    # Convert summary to DataFrame
    season_df = pd.DataFrame.from_dict(summary, orient="index")
    season_df["goal_diff"] = season_df["goals_for"] - season_df["goals_against"]
    season_df["win_pct"] = season_df["wins"] / season_df["games_played"]

    # Compute Elo
    elo_scores = calculate_elo(df)
    elo_df = pd.DataFrame(list(elo_scores.items()), columns=["team", "elo"])
    season_df = season_df.merge(elo_df, on="team", how="left")

    # Add cup_winner column
    season_df["cup_winner"] = season_df["team"].apply(lambda x: 1 if x == winner else 0 if winner else None)

    all_seasons.append(season_df)

# Combine all seasons
combined_df = pd.concat(all_seasons, ignore_index=True)
combined_df.to_csv("multi_season_team_summary.csv", index=False)
print("✅ Saved to 'multi_season_team_summary.csv'")


✅ Saved to 'multi_season_team_summary.csv'
