In [11]:
import pandas as pd
import numpy as np

# Tiny synthetic dataset (team vs opponent matches)
matches = pd.DataFrame({
    "season": ["2021/22"]*4 + ["2022/23"]*4,
    "team": ["Man City","Real Madrid","Chelsea","PSG",
             "Man City","Real Madrid","Chelsea","PSG"],
    "opponent": ["PSG","Chelsea","Bayern","Liverpool",
                 "PSG","Chelsea","Bayern","Liverpool"],
    "goals_for": [3,2,1,2, 4,1,2,3],
    "goals_against": [1,1,2,3, 2,2,1,1],
})

print("\nRaw matches:\n", matches)

# 1. Pivot: average goals_for vs opponent
matrix = matches.pivot_table(
    values="goals_for",
    index="team",
    columns="opponent",
    aggfunc="mean",
    fill_value=0
)

print("\nPivot (avg goals vs opponent):\n", matrix)


# 2. Stack: push opponent back into rows
stacked = matrix.stack().reset_index(name="avg_goals")
print("\nStacked back to long:\n", stacked.head())

# 3. Unstack: flip team/opponent axes
unstacked = stacked.set_index(["team","opponent"]).unstack("opponent")
print("\nUnstacked back to wide:\n", unstacked.head())

matrix_against = matches.pivot_table(
    values="goals_against",
    index="team",
    columns="opponent",
    aggfunc="mean",
    fill_value=0
)
print("\nPivot (avg goals against vs opponent):\n", matrix_against)

#stack
stacked_against = matrix_against.stack().reset_index(name="avg_goals_against")
print("\nAgainst Stacked back to long:\n", stacked_against.head())

#unstack
unstacked_against = stacked_against.set_index(["team","opponent"]).unstack("opponent")
print("\nAgainst Unstacked back to wide:\n", unstacked_against.head())

# Start from stacked (team, opponent, avg_goals)
best_vs = (
    stacked.sort_values(["team","avg_goals"], ascending=[True,False])
           .groupby("team", group_keys=False)
           .head(1)
)

print("\nOpponent each team scores most against:\n", best_vs)

worst_against = (
    stacked_against.sort_values(["opponent","avg_goals_against"], ascending=[True,False])
                   .groupby("opponent", group_keys=False)
                   .head(1)
)

print("\nTeam conceding most vs each opponent:\n", worst_against)

ratio_matrix = matrix / matrix_against.replace(0, np.nan)

print("\nGoal ratio (for/against):\n", ratio_matrix)

# Interpretation drill:
# >1 → team usually outscores opponent
# =1 → balanced
# <1 → team usually concedes more than it scores







Raw matches:
     season         team   opponent  goals_for  goals_against
0  2021/22     Man City        PSG          3              1
1  2021/22  Real Madrid    Chelsea          2              1
2  2021/22      Chelsea     Bayern          1              2
3  2021/22          PSG  Liverpool          2              3
4  2022/23     Man City        PSG          4              2
5  2022/23  Real Madrid    Chelsea          1              2
6  2022/23      Chelsea     Bayern          2              1
7  2022/23          PSG  Liverpool          3              1

Pivot (avg goals vs opponent):
 opponent     Bayern  Chelsea  Liverpool  PSG
team                                        
Chelsea         1.5      0.0        0.0  0.0
Man City        0.0      0.0        0.0  3.5
PSG             0.0      0.0        2.5  0.0
Real Madrid     0.0      1.5        0.0  0.0

Stacked back to long:
        team   opponent  avg_goals
0   Chelsea     Bayern        1.5
1   Chelsea    Chelsea        0.0
2   Che