In [15]:
import pandas as pd
import numpy as np

pd.set_option("display.width", 140)
pd.set_option("display.max_rows", 50)

matches = pd.DataFrame({
    "season": ["2021/22"]*6 + ["2022/23"]*6,
    "stage":  ["Group","Group","Quarter","Quarter","Semi","Semi"]*2,
    "team":   ["Man City","Real Madrid","PSG","AC Milan","Chelsea","Bayern"]*2,
    "opponent": ["PSG","Chelsea","Bayern","Real Madrid","Man City","Liverpool"]*2,
    "goals_for":     [3,2,2,0,1,2,  4,1,3,1,2,1],
    "goals_against": [1,1,1,2,0,3,  2,2,1,1,3,2],
    "shots":         [10,8,9,6,11,7, 12,6,10,7,8,6],
    "xg":            [2.1,1.4,1.5,0.6,1.1,0.8, 2.6,0.9,1.9,0.7,1.2,0.9]
})

team_summary = (matches
                .groupby(["season","team"])
                .agg(
                    games=("goals_for","count"),              # how many matches per (season, team)
                    gf_sum=("goals_for","sum"),               # total goals for
                    ga_sum=("goals_against","sum"),           # total goals against
                    gf_avg=("goals_for","mean"),              # avg goals for
                    xg_avg=("xg","mean")                      # avg expected goals
                ).reset_index())
print(team_summary.head())

# Row-level normalization: each match GF as a ratio vs team average GF (per season)
matches["gf_vs_team_avg"] = (matches
                             .groupby(["season","team"])["goals_for"]
                             .transform(lambda s: s / s.mean()))
print(matches[["season","team","goals_for","gf_vs_team_avg"]].head())

def points_from_results(group):
  # vectorized: Win=3, Draw=1, Loss=0 (using goals_for vs goals_against)
  pts = np.select(
      [group["goals_for"] > group["goals_against"],
       group["goals_for"] == group["goals_against"]],
      [3,1],
      default=0
  )

  out = pd.Series({
      "games": len(group),
      "points":int(pts.sum()),
      "gf": int(group["goals_for"].sum()),
      "ga": int(group["goals_against"].sum())
  })

  return out

standings_apply = (matches
                   .groupby(["season","team"])
                   .apply(points_from_results)
                   .reset_index()
                   .assign(gd= lambda d: d["gf"] - d["ga"]))
print(standings_apply)

standings_ranked = (standings_apply
                    .sort_values(["season","points","gd","gf"], ascending=[True,False,False,False])
                    .assign(rank = lambda d: d.groupby("season").cumcount() + 1))
print(standings_ranked)

top2 = (standings_apply
        .sort_values(["season","points","gd","gf"], ascending=[True,False,False,False])
        .groupby("season", group_keys=False).head(2)
)

print(top2)

teams_meta = pd.DataFrame({
    "team": ["Man City","Real Madrid","PSG","AC Milan","Chelsea","Bayern"],
    "country": ["England","Spain","France","Italy","England","Germany"],
    "stadium": ["Etihad","Bernabéu","Parc des Princes","San Siro","Stamford Bridge","Allianz Arena"]
})

stages_meta = pd.DataFrame({
    "stage": ["Group","Quarter","Semi","Final"],
    "round_weight": [1,2,3,4]   # toy metric (later: weighting points by round)
})

# Add country/stadium to each match’s TEAM rows (we need both home/team and opponent too sometimes)
matches_team = matches.merge(teams_meta,on="team",how="left")
print(matches_team.head())

# 1) Merge team meta (rename to avoid collisions)
left = matches.merge(teams_meta.add_prefix("team_"), left_on="team", right_on="team_team",how="left")
# 2) Merge opponent meta
full = left.merge(teams_meta.add_prefix("opp_"),left_on="opponent",right_on="opp_team",how="left")

full = full[[
    "season","stage","team","opponent",
    "goals_for","goals_against","shots","xg",
    "team_country","team_stadium","opp_country","opp_stadium"
]]
print(full.head())

matches_stage = matches.merge(stages_meta,on="stage",how="left")
print(matches_stage[["season","stage","round_weight"]].head())

# Compute per-match points

pts = np.select(
    [matches["goals_for"] > matches["goals_against"],
     matches["goals_for"] == matches["goals_against"]],
    [3,1],
    default=0
)
with_pts = matches.assign(points=pts)

# Join round weights
with_w = with_pts.merge(stages_meta,on="stage",how="left").fillna({"round_weight":1})

# Weighted points (toy formula: points * round_weight)
with_w["weighted_points"] = with_w["points"] * with_w["round_weight"]

weighted_standings = (with_w
                      .groupby(["season","team"])
                      .agg(
                          games=("goals_for","count"),
                          points=("points","sum"),
                          w_points=("weighted_points","sum"),
                          gf=("goals_for","sum"),
                          ga=("goals_against","sum")
                      )
                      .reset_index()
                      .assign(gd = lambda d: d["gf"] - d["ga"])
                      .sort_values(["season","w_points","gd","gf"], ascending=[True,False,False,False]))
print(weighted_standings)







    season      team  games  gf_sum  ga_sum  gf_avg  xg_avg
0  2021/22  AC Milan      1       0       2     0.0     0.6
1  2021/22    Bayern      1       2       3     2.0     0.8
2  2021/22   Chelsea      1       1       0     1.0     1.1
3  2021/22  Man City      1       3       1     3.0     2.1
4  2021/22       PSG      1       2       1     2.0     1.5
    season         team  goals_for  gf_vs_team_avg
0  2021/22     Man City          3             1.0
1  2021/22  Real Madrid          2             1.0
2  2021/22          PSG          2             1.0
3  2021/22     AC Milan          0             NaN
4  2021/22      Chelsea          1             1.0
     season         team  games  points  gf  ga  gd
0   2021/22     AC Milan      1       0   0   2  -2
1   2021/22       Bayern      1       0   2   3  -1
2   2021/22      Chelsea      1       3   1   0   1
3   2021/22     Man City      1       3   3   1   2
4   2021/22          PSG      1       3   2   1   1
5   2021/22  Real Madr

  .apply(points_from_results)
