# Bruno Fernandes vs Casemiro: Midfield Analysis (2025–26)

This notebook compares the statistical profiles of Casemiro and Bruno Fernandes during the 2025–26 season under Rúben Amorim’s 3-4-3 system. The goal is to assess midfield balance, attacking vs defensive contribution, and overall efficiency.


In [1]:
import pandas as pd
from pathlib import Path

# Adjust this if your CSVs are in a different folder
DATA_DIR = Path("../data")

files = {
    # Bruno
    "Bruno_Summary_Stats": "Bruno_Summary_Stats.csv",
    "Bruno_Miscellanous_Stats": "Bruno_Miscellanous_Stats.csv",
    "Bruno_Possession_Stats": "Bruno_Possession_Stats.csv",
    "Bruno_Passing_Stats": "Bruno_Passing_Stats.csv",
    "Bruno_Pass_Types_Stats": "Bruno_Pass_Types_Stats.csv",
    "Bruno_Goals_Shots_Stats": "Bruno_Goals_Shots_Stats.csv",
    "Bruno_Defensive_Action_Stats": "Bruno_Defensive_Action_Stats.csv",

    # Casemiro
    "Casemiro_Summary_Stats": "Casemiro_Summary_Stats.csv",
    "Casemiro_Miscellaneous_Stats": "Casemiro_Miscellaneous_Stats.csv",
    "Casemiro_Possesion_Stats": "Casemiro_Possesion_Stats.csv",
    "Casemiro_Passing_Stats": "Casemiro_Passing_Stats.csv",
    "Casemiro_Pass_Types_Stats": "Casemiro_Pass_Types_Stats.csv",
    "Casemiro_Goal_Shots_Stats": "Casemiro_Goal_Shots_Stats.csv",
    "Casemiro_Defensive_Actions_Stats": "Casemiro_Defensive_Actions_Stats.csv",
}

raw = {}
for key, fname in files.items():
    path = DATA_DIR / fname
    raw[key] = pd.read_csv(path)


In [2]:
def clean_fbref_table(df: pd.DataFrame) -> pd.DataFrame:
    """
    Find the row whose first cell is 'Date',
    use that as the header row, and return the cleaned data.
    """
    first_col = df.columns[0]
    header_row_idx = None
    for idx, val in df[first_col].items():
        if isinstance(val, str) and val.strip() == "Date":
            header_row_idx = idx
            break

    if header_row_idx is None:
        raise ValueError("No 'Date' header row found")

    header = df.iloc[header_row_idx]
    data = df.iloc[header_row_idx + 1 :].reset_index(drop=True).copy()
    data.columns = header

    # Drop all-NaN columns and strip whitespace from column names
    data = data.dropna(axis=1, how="all")
    data.columns = [str(c).strip() for c in data.columns]

    return data


cleaned = {name: clean_fbref_table(df) for name, df in raw.items()}


In [3]:
cleaned["Bruno_Summary_Stats"].head()


Unnamed: 0,Date,Day,Comp,Round,Venue,Result,Squad,Opponent,Start,Pos,...,GCA,Cmp,Att,Cmp%,PrgP,Carries,PrgC,Att.1,Succ,Match Report
0,8/17/25,Sun,Premier League,Matchweek 1,Home,L 0–1,Manchester Utd,Arsenal,Y*,CM,...,0.0,49.0,62.0,79.0,10.0,35.0,2.0,2.0,1.0,Match Report
1,8/24/25,Sun,Premier League,Matchweek 2,Away,D 1–1,Manchester Utd,Fulham,Y*,CM,...,0.0,47.0,57.0,82.5,5.0,37.0,1.0,3.0,2.0,Match Report
2,8/27/25,Wed,EFL Cup,Second round,Away,D 2 (11)–2 (12),Manchester Utd,Grimsby Town,N,,...,,,,,,,,,,Match Report
3,8/30/25,Sat,Premier League,Matchweek 3,Home,W 3–2,Manchester Utd,Burnley,Y*,"CM,AM",...,0.0,73.0,90.0,81.1,7.0,60.0,3.0,0.0,0.0,Match Report
4,9/6/25,Sat,WCQ,First round,Away,W 5–0,pt Portugal,am Armenia,Y,CM,...,,,,,,,,,,Match Report


In [4]:
cleaned["Casemiro_Passing_Stats"].head()


Unnamed: 0,Date,Day,Comp,Round,Venue,Result,Squad,Opponent,Start,Pos,...,Cmp%,Ast,xAG,xA,KP,3-Jan,PPA,CrsPA,PrgP,Match Report
0,8/17/25,Sun,Premier League,Matchweek 1,Home,L 0–1,Manchester Utd,Arsenal,Y,CM,...,87.5,0,0.1,0.0,1.0,2.0,0.0,0.0,4.0,Match Report
1,8/24/25,Sun,Premier League,Matchweek 2,Away,D 1–1,Manchester Utd,Fulham,Y,CM,...,50.0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Match Report
2,8/30/25,Sat,Premier League,Matchweek 3,Home,W 3–2,Manchester Utd,Burnley,Y,CM,...,57.1,0,0.0,0.0,0.0,3.0,0.0,0.0,4.0,Match Report
3,9/4/25,Thu,WCQ,WCQ — CONMEBOL (M),Home,W 3–0,br Brazil,cl Chile,Y,DM,...,,0,,,,,,,,Match Report
4,9/14/25,Sun,Premier League,Matchweek 4,Away,L 0–3,Manchester Utd,Manchester City,N,CM,...,100.0,0,0.0,0.0,0.0,3.0,0.0,0.0,2.0,Match Report


In [5]:
JOIN_KEYS = [
    "Date", "Day", "Comp", "Round", "Venue",
    "Result", "Squad", "Opponent", "Start", "Pos", "Min",
]

def make_master(player_prefix: str) -> pd.DataFrame:
    """
    Build a single wide table for one player by merging:
    - Summary
    - Misc
    - Possession
    - Passing
    - Pass types
    - Goals/Shots
    - Defensive actions

    If a metric column (e.g. 'Bruno_Match Report') already exists
    in master, we drop it from the new table to avoid duplicates.
    """
    if player_prefix == "Bruno":
        suffixes = [
            "Summary_Stats",
            "Miscellanous_Stats",
            "Possession_Stats",
            "Passing_Stats",
            "Pass_Types_Stats",
            "Goals_Shots_Stats",
            "Defensive_Action_Stats",
        ]
    else:  # Casemiro
        suffixes = [
            "Summary_Stats",
            "Miscellaneous_Stats",
            "Possesion_Stats",
            "Passing_Stats",
            "Pass_Types_Stats",
            "Goal_Shots_Stats",
            "Defensive_Actions_Stats",
        ]

    # Start from summary table
    master = cleaned[f"{player_prefix}_Summary_Stats"].copy()

    for suf in suffixes[1:]:
        name = f"{player_prefix}_{suf}"
        df = cleaned[name].copy()

        # Keep join keys and metric columns
        metric_cols = [c for c in df.columns if c not in JOIN_KEYS]

        # Rename metric columns with player prefix
        rename_map = {c: f"{player_prefix}_{c}" for c in metric_cols}
        df_renamed = df[JOIN_KEYS + metric_cols].rename(columns=rename_map)

        # Only keep metric columns that are NOT already in master
        metric_prefixed_cols = [rename_map[c] for c in metric_cols]
        new_metric_cols = [c for c in metric_prefixed_cols if c not in master.columns]

        # If there is nothing new, skip this table
        if not new_metric_cols:
            continue

        df_renamed = df_renamed[JOIN_KEYS + new_metric_cols]

        # Merge into master
        master = master.merge(df_renamed, on=JOIN_KEYS, how="left")

    return master


In [6]:
bruno_master = make_master("Bruno")
casemiro_master = make_master("Casemiro")

print(bruno_master.shape)
print(casemiro_master.shape)


(17, 276)
(100, 276)


In [7]:
def is_start_flag(val) -> bool:
    return isinstance(val, str) and "Y" in val

bruno_mu_start = bruno_master[
    (bruno_master["Squad"] == "Manchester Utd") &
    (bruno_master["Start"].apply(is_start_flag))
].copy()

casemiro_mu_start = casemiro_master[
    (casemiro_master["Squad"] == "Manchester Utd") &
    (casemiro_master["Start"].apply(is_start_flag))
].copy()

MATCH_KEYS = ["Date", "Comp", "Round", "Venue", "Result", "Squad", "Opponent"]

both_start_raw = bruno_mu_start.merge(
    casemiro_mu_start,
    on=MATCH_KEYS,
    how="inner",
    suffixes=("_BrunoRow", "_CasemiroRow")
)

print(both_start_raw.shape)
both_start_raw.head()


(9, 545)


Unnamed: 0,Date,Day_BrunoRow,Comp,Round,Venue,Result,Squad,Opponent,Start_BrunoRow,Pos_BrunoRow,...,Casemiro_Tkl,Casemiro_Tkl.1,Casemiro_Tkl.2,Casemiro_Tkl.3,Casemiro_Tkl.4,Casemiro_Tkl%,Casemiro_Pass,Casemiro_Tkl+Int,Casemiro_Clr,Casemiro_Err
0,8/17/25,Sun,Premier League,Matchweek 1,Home,L 0–1,Manchester Utd,Arsenal,Y*,CM,...,3,3,3,3,3,75.0,2,3,0,0
1,8/24/25,Sun,Premier League,Matchweek 2,Away,D 1–1,Manchester Utd,Fulham,Y*,CM,...,1,2,1,2,1,100.0,0,3,0,0
2,8/30/25,Sat,Premier League,Matchweek 3,Home,W 3–2,Manchester Utd,Burnley,Y*,"CM,AM",...,0,1,0,1,0,,1,2,2,0
3,9/20/25,Sat,Premier League,Matchweek 5,Home,W 2–1,Manchester Utd,Chelsea,Y*,CM,...,0,0,0,0,0,,0,0,0,0
4,10/4/25,Sat,Premier League,Matchweek 7,Home,W 2–0,Manchester Utd,Sunderland,Y*,"CM,AM",...,1,3,1,3,1,50.0,1,3,2,0


In [8]:
# We assume you already have these from before:
# bruno_mu_start, casemiro_mu_start

MATCH_KEYS = ["Date", "Comp", "Round", "Venue", "Result", "Squad", "Opponent"]

both_start_raw = bruno_mu_start.merge(
    casemiro_mu_start,
    on=MATCH_KEYS,
    how="inner",
    suffixes=("_BrunoRow", "_CasemiroRow"),
)

# Match metadata
meta_cols = ["Date", "Comp", "Round", "Venue", "Result", "Squad", "Opponent"]

# Minutes columns
minute_cols = ["Min_BrunoRow", "Min_CasemiroRow"]

# All Bruno+Casemiro metric columns
bruno_cols = [c for c in both_start_raw.columns if c.startswith("Bruno_")]
casemiro_cols = [c for c in both_start_raw.columns if c.startswith("Casemiro_")]

combined_cols_order = meta_cols + minute_cols + bruno_cols + casemiro_cols

both_start = both_start_raw[combined_cols_order].copy()
both_start = both_start.sort_values("Date").reset_index(drop=True)

both_start.head()


Unnamed: 0,Date,Comp,Round,Venue,Result,Squad,Opponent,Min_BrunoRow,Min_CasemiroRow,Bruno_CrdY,...,Casemiro_Tkl,Casemiro_Tkl.1,Casemiro_Tkl.2,Casemiro_Tkl.3,Casemiro_Tkl.4,Casemiro_Tkl%,Casemiro_Pass,Casemiro_Tkl+Int,Casemiro_Clr,Casemiro_Err
0,10/19/25,Premier League,Matchweek 8,Away,W 2–1,Manchester Utd,Liverpool,84,58.0,0,...,0.0,3.0,0.0,3.0,0.0,,0.0,3.0,2.0,0.0
1,10/25/25,Premier League,Matchweek 9,Home,W 4–2,Manchester Utd,Brighton,90,69.0,0,...,1.0,3.0,1.0,3.0,1.0,50.0,3.0,5.0,1.0,0.0
2,10/4/25,Premier League,Matchweek 7,Home,W 2–0,Manchester Utd,Sunderland,90,84.0,0,...,1.0,3.0,1.0,3.0,1.0,50.0,1.0,3.0,2.0,0.0
3,11/1/25,Premier League,Matchweek 10,Away,D 2–2,Manchester Utd,Nott'ham Forest,90,90.0,0,...,2.0,3.0,2.0,3.0,2.0,50.0,1.0,4.0,1.0,0.0
4,11/8/25,Premier League,Matchweek 11,Away,D 2–2,Manchester Utd,Tottenham,90,,0,...,,,,,,,,,,


In [9]:
import pandas as pd

both_start = pd.read_csv("../data/midfield_matches_bruno_casemiro_clean.csv")
both_start.head()


Unnamed: 0,Date,Comp,Round,Venue,Result,Squad,Opponent,Bruno_CrdY,Bruno_CrdR,Bruno_2CrdY,...,Casemiro_Tkl.59,Casemiro_Tkl.60,Casemiro_Tkl.61,Casemiro_Tkl.62,Casemiro_Tkl.63,Casemiro_Tkl%,Casemiro_Pass,Casemiro_Tkl+Int,Casemiro_Clr,Casemiro_Err
0,10/19/25,Premier League,Matchweek 8,Away,W 2–1,Manchester Utd,Liverpool,0,0,0,...,0.0,3.0,0.0,3.0,0.0,,0.0,3.0,2.0,0.0
1,10/25/25,Premier League,Matchweek 9,Home,W 4–2,Manchester Utd,Brighton,0,0,0,...,1.0,3.0,1.0,3.0,1.0,50.0,3.0,5.0,1.0,0.0
2,10/4/25,Premier League,Matchweek 7,Home,W 2–0,Manchester Utd,Sunderland,0,0,0,...,1.0,3.0,1.0,3.0,1.0,50.0,1.0,3.0,2.0,0.0
3,11/1/25,Premier League,Matchweek 10,Away,D 2–2,Manchester Utd,Nott'ham Forest,0,0,0,...,2.0,3.0,2.0,3.0,2.0,50.0,1.0,4.0,1.0,0.0
4,11/8/25,Premier League,Matchweek 11,Away,D 2–2,Manchester Utd,Tottenham,0,0,0,...,,,,,,,,,,
