# Line Stats Pipeline

Build line-level stint results and summaries from stint data.

In [8]:
import pandas as pd
from collections import defaultdict

DATA_DIR = "data/"
STINT_CSV = DATA_DIR + "stint_data.csv"

HOME_COLS = ["home1", "home2", "home3", "home4"]
AWAY_COLS = ["away1", "away2", "away3", "away4"]

# Normalize settings: tweak to align with player_stats.ipynb behavior if needed.
EXPECTED_GD_FACTOR = 0.1
NORMALIZE_SCALE = 40.0


## Section 1: Helper Functions


In [9]:
def compute_team_ranks(stints: pd.DataFrame) -> dict:
    game_results = stints.groupby("game_id").agg(
        {
            "h_goals": "sum",
            "a_goals": "sum",
            "h_team": "first",
            "a_team": "first",
        }
    ).reset_index()

    team_stats = defaultdict(lambda: {"wins": 0, "draws": 0, "losses": 0, "points": 0, "gd": 0})

    for _, row in game_results.iterrows():
        h_team = row["h_team"]
        a_team = row["a_team"]
        gd = row["h_goals"] - row["a_goals"]

        if gd > 0:
            team_stats[h_team]["wins"] += 1
            team_stats[h_team]["points"] += 3
            team_stats[a_team]["losses"] += 1
        elif gd < 0:
            team_stats[a_team]["wins"] += 1
            team_stats[a_team]["points"] += 3
            team_stats[h_team]["losses"] += 1
        else:
            team_stats[h_team]["draws"] += 1
            team_stats[h_team]["points"] += 1
            team_stats[a_team]["draws"] += 1
            team_stats[a_team]["points"] += 1

        team_stats[h_team]["gd"] += gd
        team_stats[a_team]["gd"] -= gd

    team_df = pd.DataFrame.from_dict(team_stats, orient="index")
    team_df = team_df.sort_values(["points", "gd"], ascending=[False, False])
    team_rank = {team: rank + 1 for rank, team in enumerate(team_df.index)}
    return team_rank


def add_stint_numbers(stints: pd.DataFrame) -> pd.DataFrame:
    out = stints.copy()
    out["original_order"] = range(len(out))
    out = out.sort_values(["game_id", "original_order"])
    out["stint_number"] = out.groupby("game_id").cumcount() + 1
    return out


def lineup_id(players: list[str]) -> str:
    return "|".join(sorted(players))


def compute_normalized_gd(gd: float, team_strength: float, opp_strength: float) -> float:
    expected_gd = (team_strength - opp_strength) * EXPECTED_GD_FACTOR
    return (gd - expected_gd) * NORMALIZE_SCALE


## Section 2: Build Line Stints and Summaries


In [10]:
def build_line_stints(stints: pd.DataFrame) -> pd.DataFrame:
    team_rank = compute_team_ranks(stints)
    max_rank = max(team_rank.values())
    team_strength = {team: (max_rank + 1 - rank) for team, rank in team_rank.items()}

    rows = []
    for _, row in stints.iterrows():
        h_team = row["h_team"]
        a_team = row["a_team"]
        h_players = [row[c] for c in HOME_COLS]
        a_players = [row[c] for c in AWAY_COLS]
        minutes = float(row["minutes"])

        h_gd = float(row["h_goals"] - row["a_goals"])
        a_gd = -h_gd

        h_norm = compute_normalized_gd(h_gd, team_strength[h_team], team_strength[a_team])
        a_norm = compute_normalized_gd(a_gd, team_strength[a_team], team_strength[h_team])

        rows.append(
            {
                "game_id": row["game_id"],
                "stint_number": row["stint_number"],
                "team": h_team,
                "opponent": a_team,
                "lineup": lineup_id(h_players),
                "minutes": minutes,
                "goals_for": row["h_goals"],
                "goals_against": row["a_goals"],
                "gd": h_gd,
                "normalized_gd": h_norm,
            }
        )
        rows.append(
            {
                "game_id": row["game_id"],
                "stint_number": row["stint_number"],
                "team": a_team,
                "opponent": h_team,
                "lineup": lineup_id(a_players),
                "minutes": minutes,
                "goals_for": row["a_goals"],
                "goals_against": row["h_goals"],
                "gd": a_gd,
                "normalized_gd": a_norm,
            }
        )

    return pd.DataFrame(rows)


def build_line_stats(line_stints: pd.DataFrame) -> pd.DataFrame:
    grouped = line_stints.groupby(["team", "lineup"], as_index=False)

    line_stats = grouped.agg(
        goals_for=("goals_for", "sum"),
        goals_against=("goals_against", "sum"),
        minutes=("minutes", "sum"),
        stints=("stint_number", "count"),
        games=("game_id", "nunique"),
        avg_normalized_gd=("normalized_gd", "mean"),
    )

    return line_stats


def build_best_lines_by_stint(line_stints: pd.DataFrame) -> pd.DataFrame:
    def weighted_norm_gd(df: pd.DataFrame) -> float:
        weights = df["minutes"].clip(lower=1e-6)
        return float((df["normalized_gd"] * weights).sum() / weights.sum())

    grouped = line_stints.groupby(["team", "stint_number", "lineup"], as_index=False)
    perf = grouped.apply(
        lambda df: pd.Series(
            {
                "stints": len(df),
                "minutes": df["minutes"].sum(),
                "avg_normalized_gd": weighted_norm_gd(df),
            }
        )
    ).reset_index()

    perf = perf.sort_values(["team", "stint_number", "avg_normalized_gd"], ascending=[True, True, False])
    best = perf.groupby(["team", "stint_number"], as_index=False).first()
    return best


## Section 3: Load Stint Data


In [11]:
stints = pd.read_csv(STINT_CSV)
stints = add_stint_numbers(stints)

print(f"Loaded {len(stints)} stints")


Loaded 7448 stints


## Section 4: Compute Outputs


In [12]:
TEAM = "Canada"

line_stints = build_line_stints(stints)
line_stats = build_line_stats(line_stints)
best_lines = build_best_lines_by_stint(line_stints)

best_lines_team = best_lines[best_lines["team"].str.lower() == TEAM.lower()].sort_values("stint_number")

print(f"Computed line stints and best lines; found {len(best_lines_team)} stints for {TEAM}")


Computed line stints and best lines; found 14 stints for Canada


  perf = grouped.apply(


## Section 5: Suggested Lines for Canada


In [13]:
# Display suggested best lines for the selected team
if best_lines_team.empty:
    print(f"No best lines found for {TEAM}")
else:
    print(f"Suggested lines for {TEAM} by stint:")
    for _, row in best_lines_team.iterrows():
        print(
            f"Stint {int(row['stint_number'])}: {row['lineup']} "
            f"(minutes={row['minutes']:.1f}, avg_norm_gd={row['avg_normalized_gd']:.2f})"
        )

# Display the filtered dataframe for convenience
best_lines_team


Suggested lines for Canada by stint:
Stint 1: Canada_p2|Canada_p3|Canada_p8|Canada_p9 (minutes=4.2, avg_norm_gd=228.00)
Stint 2: Canada_p10|Canada_p12|Canada_p3|Canada_p5 (minutes=8.7, avg_norm_gd=388.92)
Stint 3: Canada_p10|Canada_p11|Canada_p12|Canada_p5 (minutes=3.3, avg_norm_gd=380.00)
Stint 4: Canada_p11|Canada_p2|Canada_p8|Canada_p9 (minutes=5.0, avg_norm_gd=536.00)
Stint 5: Canada_p1|Canada_p11|Canada_p2|Canada_p9 (minutes=6.0, avg_norm_gd=324.00)
Stint 6: Canada_p11|Canada_p2|Canada_p3|Canada_p5 (minutes=4.4, avg_norm_gd=448.00)
Stint 7: Canada_p2|Canada_p5|Canada_p6|Canada_p9 (minutes=4.9, avg_norm_gd=344.00)
Stint 8: Canada_p10|Canada_p2|Canada_p5|Canada_p8 (minutes=5.9, avg_norm_gd=336.00)
Stint 9: Canada_p1|Canada_p11|Canada_p12|Canada_p3 (minutes=5.1, avg_norm_gd=388.00)
Stint 10: Canada_p11|Canada_p2|Canada_p5|Canada_p9 (minutes=3.6, avg_norm_gd=364.00)
Stint 11: Canada_p10|Canada_p11|Canada_p3|Canada_p9 (minutes=5.3, avg_norm_gd=652.00)
Stint 12: Canada_p10|Canada_p3|Can

Unnamed: 0,team,stint_number,index,lineup,stints,minutes,avg_normalized_gd
32,Canada,1,1870,Canada_p2|Canada_p3|Canada_p8|Canada_p9,1.0,4.229611,228.0
33,Canada,2,1886,Canada_p10|Canada_p12|Canada_p3|Canada_p5,2.0,8.727888,388.924809
34,Canada,3,1953,Canada_p10|Canada_p11|Canada_p12|Canada_p5,1.0,3.260257,380.0
35,Canada,4,2057,Canada_p11|Canada_p2|Canada_p8|Canada_p9,1.0,4.980291,536.0
36,Canada,5,2158,Canada_p1|Canada_p11|Canada_p2|Canada_p9,1.0,5.991005,324.0
37,Canada,6,2204,Canada_p11|Canada_p2|Canada_p3|Canada_p5,1.0,4.42057,448.0
38,Canada,7,2324,Canada_p2|Canada_p5|Canada_p6|Canada_p9,1.0,4.903212,344.0
39,Canada,8,2338,Canada_p10|Canada_p2|Canada_p5|Canada_p8,1.0,5.937336,336.0
40,Canada,9,2462,Canada_p1|Canada_p11|Canada_p12|Canada_p3,1.0,5.067793,388.0
41,Canada,10,2516,Canada_p11|Canada_p2|Canada_p5|Canada_p9,1.0,3.625924,364.0
