In [10]:
import pandas as pd
import numpy as np

pd.set_option("display.width", 140)
pd.set_option("display.max_rows", 50)

# Tiny dataset: add "scorer" per goal to practice player stats
matches = pd.DataFrame({
    "match_id": np.arange(1, 9),
    "season": ["2021/22"]*4 + ["2022/23"]*4,
    "stage": ["Group","Group","Quarter","Semi"]*2,
    "team": ["Man City","Real Madrid","Chelsea","PSG",
             "Man City","Real Madrid","Chelsea","PSG"],
    "opponent": ["PSG","Chelsea","Bayern","Liverpool",
                 "PSG","Chelsea","Bayern","Liverpool"],
    "goals_for": [3,2,1,2, 4,1,2,3],
    "goals_against": [1,1,2,3, 2,2,1,1],
    "scorers": [
        ["De Bruyne","Mahrez","Mahrez"],
        ["Benzema","Vinicius"],
        ["Havertz"],
        ["Messi","Mbappe"],
        ["Haaland","Haaland","KDB","Foden"],
        ["Benzema"],
        ["Mount","Havertz"],
        ["Mbappe","Mbappe","Messi"]
    ]
})

print("\nRaw matches:\n", matches.head())



# Team averages (agg) vs row-level normalized values (transform)
team_stats = (matches.groupby("team")
              .agg(
                  avg_gf = ("goals_for", "mean"),
                  avg_ga = ("goals_against", "mean")
              ))
matches["gf_vs_team_avg"] = (matches
                           .groupby("team")["goals_for"]
                           .transform(lambda s: s/ s.mean()))

print("\nTeam-level averages:\n", team_stats)
print("\nRow-level GF vs team average:\n", matches[["team","goals_for","gf_vs_team_avg"]])

matches["rank_in_stage"] = (
    matches
    .sort_values(["season","stage","goals_for","team"], ascending=[True,True,False,True])
    .groupby(["season","stage"])["goals_for"]
    .rank(method="dense", ascending=False)
)

print("\nRanking per (season, stage):\n", matches[["season","stage","team","goals_for","rank_in_stage"]])

# Sort by season+team (simulate timeline)
matches = matches.sort_values(["season","team"]).reset_index(drop=True)

# 3-match rolling avg of goals_for (per team)
matches["gf_roll3"] = (
    matches.sort_values(["team","season"])               # ensure order
           .groupby("team", group_keys=False)["goals_for"]
           .rolling(window=3, min_periods=1)
           .mean()
           .reset_index(level=0, drop=True)
)

print("\nRolling avg goals_for (last 3 matches):\n", matches[["team","goals_for","gf_roll3"]])

# Expand scorers list → one row per goal scorer
scorer_long = matches.explode("scorers")

# Count goals per (season, scorer)
top_scorers["rank_in_season"] = (
    top_scorers.groupby("season")["goals"].rank(method="dense", ascending=False)
)
top2_scorers = top_scorers[top_scorers["rank_in_season"] <= 2]

print("\nTop scorers per season:\n", top2_scorers)

form3 = (
    matches.sort_values(["team","season"])
           .assign(points=np.where(matches["goals_for"]>matches["goals_against"],3,
                                   np.where(matches["goals_for"]==matches["goals_against"],1,0)))
           .groupby("team", group_keys=False)["points"]
           .rolling(3, min_periods=1).sum()
           .reset_index(level=0, drop=True)
)
matches["points_roll3"] = form3

print("\nTop scorers per season:\n", top2_scorers.sort_values(["season","goals"], ascending=[True,False]))
print("\nTeam rolling GF (last 3):\n", matches[["team","goals_for","gf_roll3"]].head(12))




Raw matches:
    match_id   season    stage         team   opponent  goals_for  goals_against                         scorers
0         1  2021/22    Group     Man City        PSG          3              1     [De Bruyne, Mahrez, Mahrez]
1         2  2021/22    Group  Real Madrid    Chelsea          2              1             [Benzema, Vinicius]
2         3  2021/22  Quarter      Chelsea     Bayern          1              2                       [Havertz]
3         4  2021/22     Semi          PSG  Liverpool          2              3                 [Messi, Mbappe]
4         5  2022/23    Group     Man City        PSG          4              2  [Haaland, Haaland, KDB, Foden]

Team-level averages:
              avg_gf  avg_ga
team                       
Chelsea         1.5     1.5
Man City        3.5     1.5
PSG             2.5     2.0
Real Madrid     1.5     1.5

Row-level GF vs team average:
           team  goals_for  gf_vs_team_avg
0     Man City          3        0.857143
1  Rea