In [None]:
import os
from pathlib import Path

from loguru import logger
import pandas as pd
from soccerdata import FBref


# ===== Defining the data directories ====#
PROJECT_ROOT = Path().absolute().parent.parent
DATA_DIR = PROJECT_ROOT / "datasets" 
FBREF_DATA_DIR = DATA_DIR / "fbref_data"
FBREF_CACHE_DIR = PROJECT_ROOT / ".cache" / "fbref" # needed for FBRef

os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(FBREF_DATA_DIR, exist_ok=True)
os.makedirs(FBREF_CACHE_DIR, exist_ok=True)

logger.info(f"Project root: {PROJECT_ROOT}")
logger.info(f"Data directory: {DATA_DIR}")
logger.info(f"Common data directory: {FBREF_DATA_DIR}")
logger.info(f"FBRef cache directory: {FBREF_CACHE_DIR}")

In [None]:
fbref = FBref(leagues="ITA-Serie A", seasons=[21,22,23,24,25], data_dir=FBREF_CACHE_DIR)

In [None]:
# Run this cell to clear current season stats from cache (to retrieve updated data)
current_season = "2526"

for filename in os.listdir(FBREF_CACHE_DIR):
    if current_season in filename:
        file_path = os.path.join(FBREF_CACHE_DIR, filename)
        try:
            os.remove(file_path)
            logger.info(f"Deleted '{file_path}'.")
        except Exception as e:
            logger.error(f"Error deleting '{file_path}': {e}")

## Player data [Outfield]

In [None]:
players_outfield_dir = FBREF_DATA_DIR / "players_outfield"
os.makedirs(players_outfield_dir, exist_ok=True)

### standard

In [None]:
df_players_standard = fbref.read_player_season_stats("standard")
df_players_standard.columns = df_players_standard.columns.to_flat_index()
df_players_standard.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_standard.columns]

player_stats = {
    'nation': 'nationality',
    'pos': 'position',
    'born': 'birth_year',
    'Playing Time_MP': 'games',
    'Playing Time_Starts': 'games_starts',
    'Playing Time_Min': 'minutes',
    'Performance_Gls': 'goals',
    'Performance_Ast': 'assists',
    'Performance_PK': 'pens_made',
    'Performance_PKatt': 'pens_att',
    'Performance_CrdY': 'cards_yellow',
    'Performance_CrdR': 'cards_red',
    'Expected_xG': 'xg',
    'Expected_npxG': 'npxg',
    'Expected_xAG': 'xa',
    'Per 90 Minutes_Gls': 'goals_per90',
    'Per 90 Minutes_Ast': 'assists_per90',
    'Per 90 Minutes_G+A': 'goals_assists_per90',
    'Per 90 Minutes_G-PK': 'goals_pens_per90',
    'Per 90 Minutes_G+A-PK': 'goals_assists_pens_per90',
    'Per 90 Minutes_xG': 'xg_per90',
    'Per 90 Minutes_xAG': 'xa_per90',
    'Per 90 Minutes_xG+xAG': 'xg_xa_per90',
    'Per 90 Minutes_npxG': 'npxg_per90',
    'Per 90 Minutes_npxG+xAG': 'npxg_xa_per90',
}

df_players_standard.rename(columns=player_stats, inplace=True)
df_players_standard.drop(columns=[
    "Playing Time_90s",
    "Performance_G+A",
    "Performance_G-PK",
    "Expected_npxG+xAG",
    "Progression_PrgC",
    "Progression_PrgP",
], inplace=True)
df_players_standard.reset_index(inplace=True)
df_players_standard.columns.values
assert len(df_players_standard.columns) == 31

logger.info(f"Shape: {df_players_standard.shape}")

df_players_standard.to_csv(players_outfield_dir / "players_outfield_standard.csv", index=False)

### shooting

In [None]:
df_players_shooting = fbref.read_player_season_stats("shooting")
df_players_shooting.columns = df_players_shooting.columns.to_flat_index()
df_players_shooting.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_shooting.columns]

players_shooting = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Standard_Gls": "goals",
    "Standard_Sh": "shots_total",
    "Standard_SoT": "shots_on_target",
    "Standard_SoT%": "shots_on_target_pct",
    "Standard_Sh/90": "shots_total_per90",
    "Standard_SoT/90": "shots_on_target_per90",
    "Standard_G/Sh": "goals_per_shot",
    "Standard_G/SoT": "goals_per_shot_on_target",
    "Standard_PK": "pens_made",
    "Standard_PKatt": "pens_att",
    "Expected_xG": "xg",
    "Expected_npxG": "npxg",
    "Expected_npxG/Sh": "npxg_per_shot",
    "Expected_G-xG": "xg_net",
    "Expected_np:G-xG": "npxg_net",
}

# df_players_shooting.reset_index().columns.values
df_players_shooting.rename(columns=players_shooting, inplace=True)
df_players_shooting.drop(columns=[
    'Standard_Dist',
    'Standard_FK',
], inplace=True)
df_players_shooting.reset_index(inplace=True)
assert len(df_players_shooting.columns) == 24

logger.info(f"Shape: {df_players_shooting.shape}")
df_players_shooting.to_csv(players_outfield_dir / "players_outfield_shooting.csv", index=False)

### passing

In [None]:
df_players_passing = fbref.read_player_season_stats("passing")
df_players_passing.columns = df_players_passing.columns.to_flat_index()
df_players_passing.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_passing.columns]

players_passing = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Total_Cmp": "passes_completed",
    "Total_Att": "passes",
    "Total_Cmp%": "passes_pct",
    "Total_TotDist": "passes_total_distance",
    "Total_PrgDist": "passes_progressive_distance",
    "Short_Cmp": "passes_completed_short",
    "Short_Att": "passes_short",
    "Short_Cmp%": "passes_pct_short",
    "Medium_Cmp": "passes_completed_medium",
    "Medium_Att": "passes_medium",
    "Medium_Cmp%": "passes_pct_medium",
    "Long_Cmp": "passes_completed_long",
    "Long_Att": "passes_long",
    "Long_Cmp%": "passes_pct_long",
    "Ast": "assists",
    "xAG": "xa",
    "Expected_A-xAG": "xa_net",
    "KP": "assisted_shots",
    "1/3": "passes_into_final_third",
    "PPA": "passes_into_penalty_area",
    "CrsPA": "crosses_into_penalty_area",
    "PrgP": "progressive_passes",
}

df_players_passing.rename(columns=players_passing, inplace=True)
df_players_passing.drop(columns=[
    'Expected_xA',
], inplace=True)
df_players_passing.reset_index(inplace=True)
assert len(df_players_passing.columns) == 31

logger.info(f"Shape: {df_players_passing.shape}")
df_players_passing.to_csv(players_outfield_dir / "players_outfield_passing.csv", index=False)

### passing_types

In [None]:
df_players_passing_types = fbref.read_player_season_stats("passing_types")
df_players_passing_types.columns = df_players_passing_types.columns.to_flat_index()
df_players_passing_types.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_passing_types.columns]

players_passing_types = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Att": "passes",
    "Pass Types_Live": "passes_live",
    "Pass Types_Dead": "passes_dead",
    "Pass Types_FK": "passes_free_kicks",
    "Pass Types_TB": "through_balls",
    "Pass Types_Sw": "passes_switches",
    "Pass Types_Crs": "crosses",
    "Pass Types_TI": "throw_ins",
    "Pass Types_CK": "corner_kicks",
    "Corner Kicks_In": "corner_kicks_in",
    "Corner Kicks_Out": "corner_kicks_out",
    "Corner Kicks_Str": "corner_kicks_straight",
    "Outcomes_Cmp": "passes_completed",
    "Outcomes_Off": "passes_offsides",
    "Outcomes_Blocks": "passes_blocked",
}

df_players_passing_types.rename(columns=players_passing_types, inplace=True)
# df_players_passing_types.drop(columns=[
#     'players_used',
#     '90s',
#     'url',
# ], inplace=True)
df_players_passing_types.reset_index(inplace=True)
assert len(df_players_passing_types.columns) == 24

logger.info(f"Shape: {df_players_passing_types.shape}")
df_players_passing_types.to_csv(players_outfield_dir / "players_outfield_passing_types.csv", index=False)

### goal_shot_creation

In [None]:
df_players_gca = fbref.read_player_season_stats("goal_shot_creation")
df_players_gca.columns = df_players_gca.columns.to_flat_index()
df_players_gca.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_gca.columns]

players_gca = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "SCA_SCA": "sca",
    "SCA_SCA90": "sca_per90",
    "SCA Types_PassLive": "sca_passes_live",
    "SCA Types_PassDead": "sca_passes_dead",
    "SCA Types_TO": "sca_dribbles",
    "SCA Types_Sh": "sca_shots",
    "SCA Types_Fld": "sca_fouled",
    "SCA Types_Def": "sca_defense",
    "GCA_GCA": "gca",
    "GCA_GCA90": "gca_per90",
    "GCA Types_PassLive": "gca_passes_live",
    "GCA Types_PassDead": "gca_passes_dead",
    "GCA Types_TO": "gca_dribbles",
    "GCA Types_Sh": "gca_shots",
    "GCA Types_Fld": "gca_fouled",
    "GCA Types_Def": "gca_defense",
}

df_players_gca.rename(columns=players_gca, inplace=True)
df_players_gca.reset_index(inplace=True)
assert len(df_players_gca.columns) == 25

logger.info(f"Shape: {df_players_gca.shape}")
df_players_gca.to_csv(players_outfield_dir / "players_outfield_gca.csv", index=False)

### defense

In [None]:
df_players_defense = fbref.read_player_season_stats("defense")
df_players_defense.columns = df_players_defense.columns.to_flat_index()
df_players_defense.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_defense.columns]

defense = ["player","nationality","position","squad","age","birth_year","minutes_90s","tackles",
           "tackles_won","tackles_def_3rd","tackles_mid_3rd","tackles_att_3rd","dribble_tackles",
           "dribbles_vs","dribble_tackles_pct","dribbled_past","pressures","pressure_regains",
           "pressure_regain_pct","pressures_def_3rd","pressures_mid_3rd","pressures_att_3rd",
           "blocks","blocked_shots","blocked_shots_saves","blocked_passes","interceptions",
           "clearances","errors"]

['league', 'season', 'team', 'player', 'nation', 'pos', 'age',
       'born', '90s', 'Tackles_Tkl', 'Tackles_TklW', 'Tackles_Def 3rd',
       'Tackles_Mid 3rd', 'Tackles_Att 3rd', 'Challenges_Tkl',
       'Challenges_Att', 'Challenges_Tkl%', 'Challenges_Lost',
       'Blocks_Blocks', 'Blocks_Sh', 'Blocks_Pass', 'Int',
       'Clr', 'Err']

players_defense = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Tackles_Tkl": "tackles",
    "Tackles_TklW": "tackles_won",
    "Tackles_Def 3rd": "tackles_def_3rd",
    "Tackles_Mid 3rd": "tackles_mid_3rd",
    "Tackles_Att 3rd": "tackles_att_3rd",
    "Challenges_Tkl": "dribble_tackles",
    "Challenges_Att": "dribbles_vs",
    "Challenges_Tkl%": "dribble_tackles_pct",
    "Challenges_Lost": "dribbled_past",
    "Blocks_Blocks": "blocks",
    "Blocks_Sh": "blocked_shots",
    "Blocks_Pass": "blocked_passes",
    "Int": "interceptions",
    "Clr": "clearances",
    "Err": "errors",
}

df_players_defense.rename(columns=players_defense, inplace=True)
df_players_defense.drop(columns=[
    'Tkl+Int',
], inplace=True)
df_players_defense.reset_index(inplace=True)
assert len(df_players_defense.columns) == 24

logger.info(f"Shape: {df_players_defense.shape}")
df_players_defense.to_csv(players_outfield_dir / "players_outfield_defense.csv", index=False)

### possession

In [None]:
df_players_possession = fbref.read_player_season_stats("possession")
df_players_possession.columns = df_players_possession.columns.to_flat_index()
df_players_possession.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_possession.columns]

possession = ["player","nationality","position","squad","age","birth_year","minutes_90s","touches",
              "touches_def_pen_area","touches_def_3rd","touches_mid_3rd","touches_att_3rd",
              "touches_att_pen_area","touches_live_ball","dribbles_completed","dribbles",
              "dribbles_completed_pct","players_dribbled_past","nutmegs","carries","carry_distance",
              "carry_progressive_distance","progressive_carries","carries_into_final_third",
              "carries_into_penalty_area","pass_targets","passes_received","passes_received_pct",
              "miscontrols","dispossessed"]

['league', 'season', 'team', 'player', 'nation', 'pos', 'age',
       'born', '90s', 'Touches_Touches', 'Touches_Def Pen',
       'Touches_Def 3rd', 'Touches_Mid 3rd', 'Touches_Att 3rd',
       'Touches_Att Pen', 'Touches_Live', 'Take-Ons_Att', 'Take-Ons_Succ',
       'Take-Ons_Succ%', 'Take-Ons_Tkld', 'Take-Ons_Tkld%',
       'Carries_Carries', 'Carries_TotDist', 'Carries_PrgDist',
       'Carries_PrgC', 'Carries_1/3', 'Carries_CPA', 'Carries_Mis',
       'Carries_Dis', 'Receiving_Rec', 'Receiving_PrgR']

players_possession = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Touches_Touches": "touches",
    "Touches_Def Pen": "touches_def_pen_area",
    "Touches_Def 3rd": "touches_def_3rd",
    "Touches_Mid 3rd": "touches_mid_3rd",
    "Touches_Att 3rd": "touches_att_3rd",
    "Touches_Att Pen": "touches_att_pen_area",
    "Touches_Live": "touches_live_ball",
    "Take-Ons_Att": "dribbles",
    "Take-Ons_Succ": "dribbles_completed",
    "Take-Ons_Succ%": "dribbles_completed_pct",
    "Take-Ons_Tkld": "dribbles_tackled",
    "Take-Ons_Tkld%": "dribbles_tackled_pct",
    "Carries_Carries": "carries",
    "Carries_TotDist": "carry_distance",
    "Carries_PrgDist": "carry_progressive_distance",
    "Carries_PrgC": "progressive_carries",
    "Carries_1/3": "carries_into_final_third",
    "Carries_CPA": "carries_into_penalty_area",
    "Carries_Mis": "miscontrols",
    "Carries_Dis": "dispossessed",
    "Receiving_Rec": "passes_received",
    "Receiving_PrgR": "progressive_passes_received",
}

df_players_possession.rename(columns=players_possession, inplace=True)
df_players_possession.reset_index(inplace=True)
assert len(df_players_possession.columns) == 31

logger.info(f"Shape: {df_players_possession.shape}")
df_players_possession.to_csv(players_outfield_dir / "players_outfield_possession.csv", index=False)

### misc

In [None]:
df_players_misc = fbref.read_player_season_stats("misc")
df_players_misc.columns = df_players_misc.columns.to_flat_index()
df_players_misc.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_misc.columns]

['league', 'season', 'team', 'player', 'nation', 'pos', 'age',
       'born', '90s', 'Performance_CrdY', 'Performance_CrdR',
       'Performance_2CrdY', 'Performance_Fls', 'Performance_Fld',
       'Performance_Off', 'Performance_Crs', 'Performance_Int',
       'Performance_TklW', 'Performance_PKwon', 'Performance_PKcon',
       'Performance_OG', 'Performance_Recov', 'Aerial Duels_Won',
       'Aerial Duels_Lost', 'Aerial Duels_Won%']

players_misc = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Performance_CrdY": "cards_yellow",
    "Performance_CrdR": "cards_red",
    "Performance_2CrdY": "cards_yellow_red",
    "Performance_Fls": "fouls",
    "Performance_Fld": "fouled",
    "Performance_Off": "offsides",
    "Performance_Crs": "crosses",
    "Performance_Int": "interceptions",
    "Performance_TklW": "tackles_won",
    "Performance_PKwon": "pens_won",
    "Performance_PKcon": "pens_conceded",
    "Performance_OG": "own_goals",
    "Performance_Recov": "ball_recoveries",
    "Aerial Duels_Won": "aerials_won",
    "Aerial Duels_Lost": "aerials_lost",
    "Aerial Duels_Won%": "aerials_won_pct",
}

df_players_misc.rename(columns=players_misc, inplace=True)
df_players_misc.reset_index(inplace=True)
assert len(df_players_misc.columns) == 25

logger.info(f"Shape: {df_players_misc.shape}")
df_players_misc.to_csv(players_outfield_dir / "players_outfield_misc.csv", index=False)

## Player data [Keepers]

In [None]:
players_keepers_dir = FBREF_DATA_DIR / "players_keepers"
os.makedirs(players_keepers_dir, exist_ok=True)

### keeper

In [None]:
df_players_keepers = fbref.read_player_season_stats("keeper")
df_players_keepers.columns = df_players_keepers.columns.to_flat_index()
df_players_keepers.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_keepers.columns]

keepers = ["player","nationality","position","squad","age","birth_year","games_gk","games_starts_gk",
           "minutes_gk","goals_against_gk","goals_against_per90_gk","shots_on_target_against","saves",
           "save_pct","wins_gk","draws_gk","losses_gk","clean_sheets","clean_sheets_pct",
           "pens_att_gk","pens_allowed","pens_saved","pens_missed_gk"]

['league', 'season', 'team', 'player', 'nation', 'pos', 'age',
       'born', 'Playing Time_MP', 'Playing Time_Starts',
       'Playing Time_Min', 'Playing Time_90s', 'Performance_GA',
       'Performance_GA90', 'Performance_SoTA', 'Performance_Saves',
       'Performance_Save%', 'Performance_W', 'Performance_D',
       'Performance_L', 'Performance_CS', 'Performance_CS%',
       'Penalty Kicks_PKatt', 'Penalty Kicks_PKA', 'Penalty Kicks_PKsv',
       'Penalty Kicks_PKm', 'Penalty Kicks_Save%']

players_keepers = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    'Playing Time_MP': 'games_gk',
    'Playing Time_Starts': 'games_starts_gk',
    'Playing Time_Min': 'minutes_gk',
    'Performance_GA': 'goals_against_gk',
    'Performance_GA90': 'goals_against_per90_gk',
    'Performance_SoTA': 'shots_on_target_against',
    'Performance_Saves': 'saves',
    'Performance_Save%': 'save_pct',
    'Performance_W': 'wins_gk',
    'Performance_D': 'draws_gk',
    'Performance_L': 'losses_gk',
    'Performance_CS': 'clean_sheets',
    'Performance_CS%': 'clean_sheets_pct',
    'Penalty Kicks_PKatt': 'pens_att_gk',
    'Penalty Kicks_PKA': 'pens_allowed',
    'Penalty Kicks_PKsv': 'pens_saved',
    'Penalty Kicks_PKm': 'pens_missed_gk',
}

df_players_keepers.rename(columns=players_keepers, inplace=True)
df_players_keepers.drop(columns=[
    'Playing Time_90s',
    'Penalty Kicks_Save%',
], inplace=True)
df_players_keepers.reset_index(inplace=True)
assert len(df_players_keepers.columns) == 25

logger.info(f"Shape: {df_players_keepers.shape}")
df_players_keepers.to_csv(players_keepers_dir / "players_keepers.csv", index=False)

### keeper_adv

In [None]:
df_players_keepersadv = fbref.read_player_season_stats("keeper_adv")
df_players_keepersadv.columns = df_players_keepersadv.columns.to_flat_index()
df_players_keepersadv.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_players_keepersadv.columns]

['league', 'season', 'team', 'player', 'nation', 'pos', 'age',
       'born', '90s', 'Goals_GA', 'Goals_PKA', 'Goals_FK', 'Goals_CK',
       'Goals_OG', 'Expected_PSxG', 'Expected_PSxG/SoT',
       'Expected_PSxG+/-', 'Expected_/90', 'Launched_Cmp', 'Launched_Att',
       'Launched_Cmp%', 'Passes_Att (GK)', 'Passes_Thr', 'Passes_Launch%',
       'Passes_AvgLen', 'Goal Kicks_Att', 'Goal Kicks_Launch%',
       'Goal Kicks_AvgLen', 'Crosses_Opp', 'Crosses_Stp', 'Crosses_Stp%',
       'Sweeper_#OPA', 'Sweeper_#OPA/90', 'Sweeper_AvgDist']

players_keepersadv = {
    "nation": "nationality",
    "pos": "position",
    "born": "birth_year",
    "90s": "minutes_90s",
    "Goals_GA": "goals_against_gk",
    "Goals_PKA": "pens_allowed",
    "Goals_FK": "free_kick_goals_against_gk",
    "Goals_CK": "corner_kick_goals_against_gk",
    "Goals_OG": "own_goals_against_gk",
    "Expected_PSxG": "psxg_gk",
    "Expected_PSxG/SoT": "psnpxg_per_shot_on_target_against",
    "Expected_PSxG+/-": "psxg_net_gk",
    "Expected_/90": "psxg_net_per90_gk",
    "Launched_Cmp": "passes_completed_launched_gk",
    "Launched_Att": "passes_launched_gk",
    "Launched_Cmp%": "passes_pct_launched_gk",
    "Passes_Att (GK)": "passes_gk",
    "Passes_Thr": "passes_throws_gk",
    "Passes_Launch%": "pct_passes_launched_gk",
    "Passes_AvgLen": "passes_length_avg_gk",
    "Goal Kicks_Att": "goal_kicks",
    "Goal Kicks_Launch%": "pct_goal_kicks_launched",
    "Goal Kicks_AvgLen": "goal_kick_length_avg",
    "Crosses_Opp": "crosses_gk",
    "Crosses_Stp": "crosses_stopped_gk",
    "Crosses_Stp%": "crosses_stopped_pct_gk",
    "Sweeper_#OPA": "def_actions_outside_pen_area_gk",
    "Sweeper_#OPA/90": "def_actions_outside_pen_area_per90_gk",
    "Sweeper_AvgDist": "avg_distance_def_actions_gk",
}

df_players_keepersadv.rename(columns=players_keepersadv, inplace=True)
df_players_keepersadv.reset_index(inplace=True)
assert len(df_players_keepersadv.columns) == 34

logger.info(f"Shape: {df_players_keepersadv.shape}")
df_players_keepersadv.to_csv(players_keepers_dir / "players_keepersadv.csv", index=False)

## Team data ['for']


In [None]:
teams_for_dir = FBREF_DATA_DIR / "teams_for"
os.makedirs(teams_for_dir, exist_ok=True)

### standard

In [None]:
df_teams_for_standard = fbref.read_team_season_stats("standard", opponent_stats=False)
df_teams_for_standard.columns = df_teams_for_standard.columns.to_flat_index()
df_teams_for_standard.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_standard.columns]

team_stats = {
    'Poss': 'possession',
    'Playing Time_MP': 'games',
    'Playing Time_Starts': 'games_starts',
    'Playing Time_Min': 'minutes',
    'Performance_Gls': 'goals',
    'Performance_Ast': 'assists',
    'Performance_PK': 'pens_made',
    'Performance_PKatt': 'pens_att',
    'Performance_CrdY': 'cards_yellow',
    'Performance_CrdR': 'cards_red',
    'Expected_xG': 'xg',
    'Expected_npxG': 'npxg',
    'Expected_xAG': 'xa',
    'Per 90 Minutes_Gls': 'goals_per90',
    'Per 90 Minutes_Ast': 'assists_per90',
    'Per 90 Minutes_G+A': 'goals_assists_per90',
    'Per 90 Minutes_G-PK': 'goals_pens_per90',
    'Per 90 Minutes_G+A-PK': 'goals_assists_pens_per90',
    'Per 90 Minutes_xG': 'xg_per90',
    'Per 90 Minutes_xAG': 'xa_per90',
    'Per 90 Minutes_xG+xAG': 'xg_xa_per90',
    'Per 90 Minutes_npxG': 'npxg_per90',
    'Per 90 Minutes_npxG+xAG': 'npxg_xa_per90',
}

df_teams_for_standard.rename(columns=team_stats, inplace=True)
df_teams_for_standard.drop(columns=[
    "Age",
    "Playing Time_90s",
    "Performance_G+A",
    "Performance_G-PK",
    "Expected_npxG+xAG",
    "Progression_PrgC",
    "Progression_PrgP",
    "url"
], inplace=True)
df_teams_for_standard.reset_index(inplace=True)
assert len(df_teams_for_standard.columns) == 27

logger.info(f"Shape: {df_teams_for_standard.shape}")

df_teams_for_standard.to_csv(teams_for_dir / "teams_for_standard.csv", index=False)

### keeper

In [None]:
df_teams_for_keepers = fbref.read_team_season_stats("keeper", opponent_stats=False)
df_teams_for_keepers.columns = df_teams_for_keepers.columns.to_flat_index()
df_teams_for_keepers.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_keepers.columns]

teams_keepers = {
    'Playing Time_MP': 'games_gk',
    'Playing Time_Starts': 'games_starts_gk',
    'Playing Time_Min': 'minutes_gk',
    'Performance_GA': 'goals_against_gk',
    'Performance_GA90': 'goals_against_per90_gk',
    'Performance_SoTA': 'shots_on_target_against',
    'Performance_Saves': 'saves',
    'Performance_Save%': 'save_pct',
    'Performance_W': 'wins_gk',
    'Performance_D': 'draws_gk',
    'Performance_L': 'losses_gk',
    'Performance_CS': 'clean_sheets',
    'Performance_CS%': 'clean_sheets_pct',
    'Penalty Kicks_PKatt': 'pens_att_gk',
    'Penalty Kicks_PKA': 'pens_allowed',
    'Penalty Kicks_PKsv': 'pens_saved',
    'Penalty Kicks_PKm': 'pens_missed_gk',
}
df_teams_for_keepers.rename(columns=teams_keepers, inplace=True)
df_teams_for_keepers.drop(columns=[
    'Playing Time_90s',
    'Penalty Kicks_Save%',
    'url',
], inplace=True)
df_teams_for_keepers.reset_index(inplace=True)
assert len(df_teams_for_keepers.columns) == 21

logger.info(f"Shape: {df_teams_for_keepers.shape}")
df_teams_for_keepers.to_csv(teams_for_dir / "teams_for_keepers.csv", index=False)

### keeper_adv

In [None]:
df_teams_for_keepersadv = fbref.read_team_season_stats("keeper_adv", opponent_stats=False)
df_teams_for_keepersadv.columns = df_teams_for_keepersadv.columns.to_flat_index()
df_teams_for_keepersadv.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_keepersadv.columns]

teams_keepersadv = {
    "90s": "minutes_90s",
    "Goals_GA": "goals_against_gk",
    "Goals_PKA": "pens_allowed",
    "Goals_FK": "free_kick_goals_against_gk",
    "Goals_CK": "corner_kick_goals_against_gk",
    "Goals_OG": "own_goals_against_gk",
    "Expected_PSxG": "psxg_gk",
    "Expected_PSxG/SoT": "psnpxg_per_shot_on_target_against",
    "Expected_PSxG+/-": "psxg_net_gk",
    "Expected_/90": "psxg_net_per90_gk",
    "Launched_Cmp": "passes_completed_launched_gk",
    "Launched_Att": "passes_launched_gk",
    "Launched_Cmp%": "passes_pct_launched_gk",
    "Passes_Att (GK)": "passes_gk",
    "Passes_Thr": "passes_throws_gk",
    "Passes_Launch%": "pct_passes_launched_gk",
    "Passes_AvgLen": "passes_length_avg_gk",
    "Goal Kicks_Att": "goal_kicks",
    "Goal Kicks_Launch%": "pct_goal_kicks_launched",
    "Goal Kicks_AvgLen": "goal_kick_length_avg",
    "Crosses_Opp": "crosses_gk",
    "Crosses_Stp": "crosses_stopped_gk",
    "Crosses_Stp%": "crosses_stopped_pct_gk",
    "Sweeper_#OPA": "def_actions_outside_pen_area_gk",
    "Sweeper_#OPA/90": "def_actions_outside_pen_area_per90_gk",
    "Sweeper_AvgDist": "avg_distance_def_actions_gk",
}

df_teams_for_keepersadv.rename(columns=teams_keepersadv, inplace=True)
df_teams_for_keepersadv.drop(columns=[
    'players_used',
    'url',
], inplace=True)
df_teams_for_keepersadv.reset_index(inplace=True)
assert len(df_teams_for_keepersadv.columns) == 29

logger.info(f"Shape: {df_teams_for_keepersadv.shape}")
df_teams_for_keepersadv.to_csv(teams_for_dir / "teams_for_keepersadv.csv", index=False)

### shooting

In [None]:
df_teams_for_shooting = fbref.read_team_season_stats("shooting", opponent_stats=False)
df_teams_for_shooting.columns = df_teams_for_shooting.columns.to_flat_index()
df_teams_for_shooting.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_shooting.columns]

teams_shooting = {
    "Standard_Gls": "goals",
    "Standard_Sh": "shots_total",
    "Standard_SoT": "shots_on_target",
    "Standard_SoT%": "shots_on_target_pct",
    "Standard_Sh/90": "shots_total_per90",
    "Standard_SoT/90": "shots_on_target_per90",
    "Standard_G/Sh": "goals_per_shot",
    "Standard_G/SoT": "goals_per_shot_on_target",
    "Standard_PK": "pens_made",
    "Standard_PKatt": "pens_att",
    "Expected_xG": "xg",
    "Expected_npxG": "npxg",
    "Expected_npxG/Sh": "npxg_per_shot",
    "Expected_G-xG": "xg_net",
    "Expected_np:G-xG": "npxg_net",
}

df_teams_for_shooting.rename(columns=teams_shooting, inplace=True)
df_teams_for_shooting.drop(columns=[
    'players_used',
    'Standard_Dist',
    'Standard_FK',
    'url',
], inplace=True)
df_teams_for_shooting.reset_index(inplace=True)
assert len(df_teams_for_shooting.columns) == 19

logger.info(f"Shape: {df_teams_for_shooting.shape}")
df_teams_for_shooting.to_csv(teams_for_dir / "teams_for_shooting.csv", index=False)

### passing

In [None]:
df_teams_for_passing = fbref.read_team_season_stats("passing", opponent_stats=False)
df_teams_for_passing.columns = df_teams_for_passing.columns.to_flat_index()
df_teams_for_passing.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_passing.columns]

teams_passing = {
    "Total_Cmp": "passes_completed",
    "Total_Att": "passes",
    "Total_Cmp%": "passes_pct",
    "Total_TotDist": "passes_total_distance",
    "Total_PrgDist": "passes_progressive_distance",
    "Short_Cmp": "passes_completed_short",
    "Short_Att": "passes_short",
    "Short_Cmp%": "passes_pct_short",
    "Medium_Cmp": "passes_completed_medium",
    "Medium_Att": "passes_medium",
    "Medium_Cmp%": "passes_pct_medium",
    "Long_Cmp": "passes_completed_long",
    "Long_Att": "passes_long",
    "Long_Cmp%": "passes_pct_long",
    "Ast": "assists",
    "xAG": "xa",
    "Expected_A-xAG": "xa_net",
    "KP": "assisted_shots",
    "1/3": "passes_into_final_third",
    "PPA": "passes_into_penalty_area",
    "CrsPA": "crosses_into_penalty_area",
    "PrgP": "progressive_passes",
}

df_teams_for_passing.rename(columns=teams_passing, inplace=True)
df_teams_for_passing.drop(columns=[
    'players_used',
    '90s',
    'Expected_xA',
    'url',
], inplace=True)
df_teams_for_passing.reset_index(inplace=True)
assert len(df_teams_for_passing.columns) == 25

logger.info(f"Shape: {df_teams_for_passing.shape}")
df_teams_for_passing.to_csv(teams_for_dir / "teams_for_passing.csv", index=False)

### passing_types

In [None]:
df_teams_for_passing_types = fbref.read_team_season_stats("passing_types", opponent_stats=False)
df_teams_for_passing_types.columns = df_teams_for_passing_types.columns.to_flat_index()
df_teams_for_passing_types.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_passing_types.columns]

passing_types2 = ["passes","passes_live","passes_dead","passes_free_kicks","through_balls","passes_switches",
                  "crosses","corner_kicks","corner_kicks_in","corner_kicks_out","corner_kicks_straight",
                  "throw_ins","passes_completed","passes_offsides","passes_blocked"]

teams_passing_types = {
    "Att": "passes",
    "Pass Types_Live": "passes_live",
    "Pass Types_Dead": "passes_dead",
    "Pass Types_FK": "passes_free_kicks",
    "Pass Types_TB": "through_balls",
    "Pass Types_Sw": "passes_switches",
    "Pass Types_Crs": "crosses",
    "Pass Types_TI": "throw_ins",
    "Pass Types_CK": "corner_kicks",
    "Corner Kicks_In": "corner_kicks_in",
    "Corner Kicks_Out": "corner_kicks_out",
    "Corner Kicks_Str": "corner_kicks_straight",
    "Outcomes_Cmp": "passes_completed",
    "Outcomes_Off": "passes_offsides",
    "Outcomes_Blocks": "passes_blocked",
}

# df_teams_for_passing_types.reset_index().columns.values
df_teams_for_passing_types.rename(columns=teams_passing_types, inplace=True)
df_teams_for_passing_types.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_for_passing_types.reset_index(inplace=True)
assert len(df_teams_for_passing_types.columns) == 18

logger.info(f"Shape: {df_teams_for_passing_types.shape}")
df_teams_for_passing_types.to_csv(teams_for_dir / "teams_for_passing_types.csv", index=False)

### goal_shot_creation

In [None]:
df_teams_for_gca = fbref.read_team_season_stats("goal_shot_creation", opponent_stats=False)
df_teams_for_gca.columns = df_teams_for_gca.columns.to_flat_index()
df_teams_for_gca.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_gca.columns]

teams_gca = {
    "SCA_SCA": "sca",
    "SCA_SCA90": "sca_per90",
    "SCA Types_PassLive": "sca_passes_live",
    "SCA Types_PassDead": "sca_passes_dead",
    "SCA Types_TO": "sca_dribbles",
    "SCA Types_Sh": "sca_shots",
    "SCA Types_Fld": "sca_fouled",
    "SCA Types_Def": "sca_defense",
    "GCA_GCA": "gca",
    "GCA_GCA90": "gca_per90",
    "GCA Types_PassLive": "gca_passes_live",
    "GCA Types_PassDead": "gca_passes_dead",
    "GCA Types_TO": "gca_dribbles",
    "GCA Types_Sh": "gca_shots",
    "GCA Types_Fld": "gca_fouled",
    "GCA Types_Def": "gca_defense",
}

df_teams_for_gca.rename(columns=teams_gca, inplace=True)
df_teams_for_gca.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_for_gca.reset_index(inplace=True)
assert len(df_teams_for_gca.columns) == 19

logger.info(f"Shape: {df_teams_for_gca.shape}")
df_teams_for_gca.to_csv(teams_for_dir / "teams_for_gca.csv", index=False)

### defense

In [None]:
df_teams_for_defense = fbref.read_team_season_stats("defense", opponent_stats=False)
df_teams_for_defense.columns = df_teams_for_defense.columns.to_flat_index()
df_teams_for_defense.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_defense.columns]

defense2 = ["tackles","tackles_won","tackles_def_3rd","tackles_mid_3rd","tackles_att_3rd",
            "dribble_tackles","dribbles_vs","dribble_tackles_pct","dribbled_past","pressures",
            "pressure_regains","pressure_regain_pct","pressures_def_3rd","pressures_mid_3rd",
            "pressures_att_3rd","blocks","blocked_shots","blocked_shots_saves","blocked_passes",
            "interceptions","clearances","errors"]

teams_defense = {
    "Tackles_Tkl": "tackles",
    "Tackles_TklW": "tackles_won",
    "Tackles_Def 3rd": "tackles_def_3rd",
    "Tackles_Mid 3rd": "tackles_mid_3rd",
    "Tackles_Att 3rd": "tackles_att_3rd",
    "Challenges_Tkl": "dribble_tackles",
    "Challenges_Att": "dribbles_vs",
    "Challenges_Tkl%": "dribble_tackles_pct",
    "Challenges_Lost": "dribbled_past",
    "Blocks_Blocks": "blocks",
    "Blocks_Sh": "blocked_shots",
    "Blocks_Pass": "blocked_passes",
    "Int": "interceptions",
    "Clr": "clearances",
    "Err": "errors",
}

df_teams_for_defense.rename(columns=teams_defense, inplace=True)
df_teams_for_defense.drop(columns=[
    'players_used',
    '90s',
    'Tkl+Int',
    'url',
], inplace=True)
df_teams_for_defense.reset_index(inplace=True)
assert len(df_teams_for_defense.columns) == 18

logger.info(f"Shape: {df_teams_for_defense.shape}")
df_teams_for_defense.to_csv(teams_for_dir / "teams_for_defense.csv", index=False)

### possession

In [None]:
df_teams_for_possession = fbref.read_team_season_stats("possession", opponent_stats=False)
df_teams_for_possession.columns = df_teams_for_possession.columns.to_flat_index()
df_teams_for_possession.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_possession.columns]

possession2 = ["touches","touches_def_pen_area","touches_def_3rd","touches_mid_3rd","touches_att_3rd",
               "touches_att_pen_area","touches_live_ball","dribbles_completed","dribbles","dribbles_completed_pct",
               "players_dribbled_past","nutmegs","carries","carry_distance","carry_progressive_distance",
               "progressive_carries","carries_into_final_third","carries_into_penalty_area","pass_targets",
               "passes_received","passes_received_pct","miscontrols","dispossessed"]


teams_possession = {
    "Touches_Touches": "touches",
    "Touches_Def Pen": "touches_def_pen_area",
    "Touches_Def 3rd": "touches_def_3rd",
    "Touches_Mid 3rd": "touches_mid_3rd",
    "Touches_Att 3rd": "touches_att_3rd",
    "Touches_Att Pen": "touches_att_pen_area",
    "Touches_Live": "touches_live_ball",
    "Take-Ons_Att": "dribbles",
    "Take-Ons_Succ": "dribbles_completed",
    "Take-Ons_Succ%": "dribbles_completed_pct",
    "Take-Ons_Tkld": "dribbles_tackled",
    "Take-Ons_Tkld%": "dribbles_tackled_pct",
    "Carries_Carries": "carries",
    "Carries_TotDist": "carry_distance",
    "Carries_PrgDist": "carry_progressive_distance",
    "Carries_PrgC": "progressive_carries",
    "Carries_1/3": "carries_into_final_third",
    "Carries_CPA": "carries_into_penalty_area",
    "Carries_Mis": "miscontrols",
    "Carries_Dis": "dispossessed",
    "Receiving_Rec": "passes_received",
    "Receiving_PrgR": "progressive_passes_received",
}

df_teams_for_possession.rename(columns=teams_possession, inplace=True)
df_teams_for_possession.drop(columns=[
    'players_used',
    'Poss',
    '90s',
    'url',
], inplace=True)
df_teams_for_possession.reset_index(inplace=True)
assert len(df_teams_for_possession.columns) == 25

logger.info(f"Shape: {df_teams_for_possession.shape}")
df_teams_for_possession.to_csv(teams_for_dir / "teams_for_possession.csv", index=False)

### misc

In [None]:
df_teams_for_misc = fbref.read_team_season_stats("misc", opponent_stats=False)
df_teams_for_misc.columns = df_teams_for_misc.columns.to_flat_index()
df_teams_for_misc.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_for_misc.columns]

teams_misc = {
    "Performance_CrdY": "cards_yellow",
    "Performance_CrdR": "cards_red",
    "Performance_2CrdY": "cards_yellow_red",
    "Performance_Fls": "fouls",
    "Performance_Fld": "fouled",
    "Performance_Off": "offsides",
    "Performance_Crs": "crosses",
    "Performance_Int": "interceptions",
    "Performance_TklW": "tackles_won",
    "Performance_PKwon": "pens_won",
    "Performance_PKcon": "pens_conceded",
    "Performance_OG": "own_goals",
    "Performance_Recov": "ball_recoveries",
    "Aerial Duels_Won": "aerials_won",
    "Aerial Duels_Lost": "aerials_lost",
    "Aerial Duels_Won%": "aerials_won_pct",
}

df_teams_for_misc.rename(columns=teams_misc, inplace=True)
df_teams_for_misc.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_for_misc.reset_index(inplace=True)
assert len(df_teams_for_misc.columns) == 19

logger.info(f"Shape: {df_teams_for_misc.shape}")
df_teams_for_misc.to_csv(teams_for_dir / "teams_for_misc.csv", index=False)

## Team data ['vs']


In [None]:
teams_vs_dir = FBREF_DATA_DIR / "teams_vs"
os.makedirs(teams_vs_dir, exist_ok=True)

### standard

In [None]:
df_teams_vs_standard = fbref.read_team_season_stats("standard", opponent_stats=True)
df_teams_vs_standard.columns = df_teams_vs_standard.columns.to_flat_index()
df_teams_vs_standard.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_standard.columns]

team_stats = {
    'Poss': 'possession',
    'Playing Time_MP': 'games',
    'Playing Time_Starts': 'games_starts',
    'Playing Time_Min': 'minutes',
    'Performance_Gls': 'goals',
    'Performance_Ast': 'assists',
    'Performance_PK': 'pens_made',
    'Performance_PKatt': 'pens_att',
    'Performance_CrdY': 'cards_yellow',
    'Performance_CrdR': 'cards_red',
    'Expected_xG': 'xg',
    'Expected_npxG': 'npxg',
    'Expected_xAG': 'xa',
    'Per 90 Minutes_Gls': 'goals_per90',
    'Per 90 Minutes_Ast': 'assists_per90',
    'Per 90 Minutes_G+A': 'goals_assists_per90',
    'Per 90 Minutes_G-PK': 'goals_pens_per90',
    'Per 90 Minutes_G+A-PK': 'goals_assists_pens_per90',
    'Per 90 Minutes_xG': 'xg_per90',
    'Per 90 Minutes_xAG': 'xa_per90',
    'Per 90 Minutes_xG+xAG': 'xg_xa_per90',
    'Per 90 Minutes_npxG': 'npxg_per90',
    'Per 90 Minutes_npxG+xAG': 'npxg_xa_per90',
}

df_teams_vs_standard.rename(columns=team_stats, inplace=True)
df_teams_vs_standard.drop(columns=[
    "Age",
    "Playing Time_90s",
    "Performance_G+A",
    "Performance_G-PK",
    "Expected_npxG+xAG",
    "Progression_PrgC",
    "Progression_PrgP",
    "url"
], inplace=True)
df_teams_vs_standard.reset_index(inplace=True)
assert len(df_teams_vs_standard.columns) == 27

logger.info(f"Shape: {df_teams_vs_standard.shape}")

df_teams_vs_standard.to_csv(teams_vs_dir / "teams_vs_standard.csv", index=False)

### keeper

In [None]:
df_teams_vs_keepers = fbref.read_team_season_stats("keeper", opponent_stats=True)
df_teams_vs_keepers.columns = df_teams_vs_keepers.columns.to_flat_index()
df_teams_vs_keepers.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_keepers.columns]

teams_keepers = {
    'Playing Time_MP': 'games_gk',
    'Playing Time_Starts': 'games_starts_gk',
    'Playing Time_Min': 'minutes_gk',
    'Performance_GA': 'goals_against_gk',
    'Performance_GA90': 'goals_against_per90_gk',
    'Performance_SoTA': 'shots_on_target_against',
    'Performance_Saves': 'saves',
    'Performance_Save%': 'save_pct',
    'Performance_W': 'wins_gk',
    'Performance_D': 'draws_gk',
    'Performance_L': 'losses_gk',
    'Performance_CS': 'clean_sheets',
    'Performance_CS%': 'clean_sheets_pct',
    'Penalty Kicks_PKatt': 'pens_att_gk',
    'Penalty Kicks_PKA': 'pens_allowed',
    'Penalty Kicks_PKsv': 'pens_saved',
    'Penalty Kicks_PKm': 'pens_missed_gk',
}

df_teams_vs_keepers.rename(columns=teams_keepers, inplace=True)
df_teams_vs_keepers.drop(columns=[
    'Playing Time_90s',
    'Penalty Kicks_Save%',
    'url',
], inplace=True)
df_teams_vs_keepers.reset_index(inplace=True)
assert len(df_teams_vs_keepers.columns) == 21

logger.info(f"Shape: {df_teams_vs_keepers.shape}")
df_teams_vs_keepers.to_csv(teams_vs_dir / "teams_vs_keepers.csv", index=False)

### keeper_adv

In [None]:
df_teams_vs_keepersadv = fbref.read_team_season_stats("keeper_adv", opponent_stats=True)
df_teams_vs_keepersadv.columns = df_teams_vs_keepersadv.columns.to_flat_index()
df_teams_vs_keepersadv.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_keepersadv.columns]

teams_keepersadv = {
    "90s": "minutes_90s",
    "Goals_GA": "goals_against_gk",
    "Goals_PKA": "pens_allowed",
    "Goals_FK": "free_kick_goals_against_gk",
    "Goals_CK": "corner_kick_goals_against_gk",
    "Goals_OG": "own_goals_against_gk",
    "Expected_PSxG": "psxg_gk",
    "Expected_PSxG/SoT": "psnpxg_per_shot_on_target_against",
    "Expected_PSxG+/-": "psxg_net_gk",
    "Expected_/90": "psxg_net_per90_gk",
    "Launched_Cmp": "passes_completed_launched_gk",
    "Launched_Att": "passes_launched_gk",
    "Launched_Cmp%": "passes_pct_launched_gk",
    "Passes_Att (GK)": "passes_gk",
    "Passes_Thr": "passes_throws_gk",
    "Passes_Launch%": "pct_passes_launched_gk",
    "Passes_AvgLen": "passes_length_avg_gk",
    "Goal Kicks_Att": "goal_kicks",
    "Goal Kicks_Launch%": "pct_goal_kicks_launched",
    "Goal Kicks_AvgLen": "goal_kick_length_avg",
    "Crosses_Opp": "crosses_gk",
    "Crosses_Stp": "crosses_stopped_gk",
    "Crosses_Stp": "crosses_stopped_gk",
    "Crosses_Stp%": "crosses_stopped_pct_gk",
    "Sweeper_#OPA": "def_actions_outside_pen_area_gk",
    "Sweeper_#OPA/90": "def_actions_outside_pen_area_per90_gk",
    "Sweeper_AvgDist": "avg_distance_def_actions_gk",
}

df_teams_vs_keepersadv.rename(columns=teams_keepersadv, inplace=True)
df_teams_vs_keepersadv.drop(columns=[
    'players_used',
    'url',
], inplace=True)
df_teams_vs_keepersadv.reset_index(inplace=True)
assert len(df_teams_vs_keepersadv.columns) == 29

logger.info(f"Shape: {df_teams_vs_keepersadv.shape}")
df_teams_vs_keepersadv.to_csv(teams_vs_dir / "teams_vs_keepersadv.csv", index=False)

### shooting

In [None]:
df_teams_vs_shooting = fbref.read_team_season_stats("shooting", opponent_stats=True)
df_teams_vs_shooting.columns = df_teams_vs_shooting.columns.to_flat_index()
df_teams_vs_shooting.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_shooting.columns]

teams_shooting = {
    "Standard_Gls": "goals",
    "Standard_Sh": "shots_total",
    "Standard_SoT": "shots_on_target",
    "Standard_SoT%": "shots_on_target_pct",
    "Standard_Sh/90": "shots_total_per90",
    "Standard_SoT/90": "shots_on_target_per90",
    "Standard_G/Sh": "goals_per_shot",
    "Standard_G/SoT": "goals_per_shot_on_target",
    "Standard_PK": "pens_made",
    "Standard_PKatt": "pens_att",
    "Expected_xG": "xg",
    "Expected_npxG": "npxg",
    "Expected_npxG/Sh": "npxg_per_shot",
    "Expected_G-xG": "xg_net",
    "Expected_np:G-xG": "npxg_net",
}

df_teams_vs_shooting.rename(columns=teams_shooting, inplace=True)
df_teams_vs_shooting.drop(columns=[
    'players_used',
    'Standard_Dist',
    'Standard_FK',
    'url',
], inplace=True)
df_teams_vs_shooting.reset_index(inplace=True)
assert len(df_teams_vs_shooting.columns) == 19

logger.info(f"Shape: {df_teams_vs_shooting.shape}")
df_teams_vs_shooting.to_csv(teams_vs_dir / "teams_vs_shooting.csv", index=False)

### passing

In [None]:
df_teams_vs_passing = fbref.read_team_season_stats("passing", opponent_stats=True)
df_teams_vs_passing.columns = df_teams_vs_passing.columns.to_flat_index()
df_teams_vs_passing.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_passing.columns]

teams_passing = {
    "Total_Cmp": "passes_completed",
    "Total_Att": "passes",
    "Total_Cmp%": "passes_pct",
    "Total_TotDist": "passes_total_distance",
    "Total_PrgDist": "passes_progressive_distance",
    "Short_Cmp": "passes_completed_short",
    "Short_Att": "passes_short",
    "Short_Cmp%": "passes_pct_short",
    "Medium_Cmp": "passes_completed_medium",
    "Medium_Att": "passes_medium",
    "Medium_Cmp%": "passes_pct_medium",
    "Long_Cmp": "passes_completed_long",
    "Long_Att": "passes_long",
    "Long_Cmp%": "passes_pct_long",
    "Ast": "assists",
    "xAG": "xa",
    "Expected_A-xAG": "xa_net",
    "KP": "assisted_shots",
    "1/3": "passes_into_final_third",
    "PPA": "passes_into_penalty_area",
    "CrsPA": "crosses_into_penalty_area",
    "PrgP": "progressive_passes",
}

df_teams_vs_passing.rename(columns=teams_passing, inplace=True)
df_teams_vs_passing.drop(columns=[
    'players_used',
    '90s',
    'Expected_xA',
    'url',
], inplace=True)
df_teams_vs_passing.reset_index(inplace=True)
assert len(df_teams_vs_passing.columns) == 25

logger.info(f"Shape: {df_teams_vs_passing.shape}")
df_teams_vs_passing.to_csv(teams_vs_dir / "teams_vs_passing.csv", index=False)

### passing_types

In [None]:
df_teams_vs_passing_types = fbref.read_team_season_stats("passing_types", opponent_stats=True)
df_teams_vs_passing_types.columns = df_teams_vs_passing_types.columns.to_flat_index()
df_teams_vs_passing_types.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_passing_types.columns]

passing_types2 = ["passes","passes_live","passes_dead","passes_free_kicks","through_balls","passes_switches",
                  "crosses","corner_kicks","corner_kicks_in","corner_kicks_out","corner_kicks_straight",
                  "throw_ins","passes_completed","passes_offsides","passes_blocked"]

teams_passing_types = {
    "Att": "passes",
    "Pass Types_Live": "passes_live",
    "Pass Types_Dead": "passes_dead",
    "Pass Types_FK": "passes_free_kicks",
    "Pass Types_TB": "through_balls",
    "Pass Types_Sw": "passes_switches",
    "Pass Types_Crs": "crosses",
    "Pass Types_TI": "throw_ins",
    "Pass Types_CK": "corner_kicks",
    "Corner Kicks_In": "corner_kicks_in",
    "Corner Kicks_Out": "corner_kicks_out",
    "Corner Kicks_Str": "corner_kicks_straight",
    "Outcomes_Cmp": "passes_completed",
    "Outcomes_Off": "passes_offsides",
    "Outcomes_Blocks": "passes_blocked",
}

df_teams_vs_passing_types.rename(columns=teams_passing_types, inplace=True)
df_teams_vs_passing_types.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_vs_passing_types.reset_index(inplace=True)
assert len(df_teams_vs_passing_types.columns) == 18

logger.info(f"Shape: {df_teams_vs_passing_types.shape}")
df_teams_vs_passing_types.to_csv(teams_vs_dir / "teams_vs_passing_types.csv", index=False)

### goal_shot_creation

In [None]:
df_teams_vs_gca = fbref.read_team_season_stats("goal_shot_creation", opponent_stats=True)
df_teams_vs_gca.columns = df_teams_vs_gca.columns.to_flat_index()
df_teams_vs_gca.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_gca.columns]

teams_gca = {
    "SCA_SCA": "sca",
    "SCA_SCA90": "sca_per90",
    "SCA Types_PassLive": "sca_passes_live",
    "SCA Types_PassDead": "sca_passes_dead",
    "SCA Types_TO": "sca_dribbles",
    "SCA Types_Sh": "sca_shots",
    "SCA Types_Fld": "sca_fouled",
    "SCA Types_Def": "sca_defense",
    "GCA_GCA": "gca",
    "GCA_GCA90": "gca_per90",
    "GCA Types_PassLive": "gca_passes_live",
    "GCA Types_PassDead": "gca_passes_dead",
    "GCA Types_TO": "gca_dribbles",
    "GCA Types_Sh": "gca_shots",
    "GCA Types_Fld": "gca_fouled",
    "GCA Types_Def": "gca_defense",
}

df_teams_vs_gca.rename(columns=teams_gca, inplace=True)
df_teams_vs_gca.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_vs_gca.reset_index(inplace=True)
assert len(df_teams_vs_gca.columns) == 19

logger.info(f"Shape: {df_teams_vs_gca.shape}")
df_teams_vs_gca.to_csv(teams_vs_dir / "teams_vs_gca.csv", index=False)

### defense

In [None]:
df_teams_vs_defense = fbref.read_team_season_stats("defense", opponent_stats=True)
df_teams_vs_defense.columns = df_teams_vs_defense.columns.to_flat_index()
df_teams_vs_defense.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_defense.columns]

defense2 = ["tackles","tackles_won","tackles_def_3rd","tackles_mid_3rd","tackles_att_3rd",
            "dribble_tackles","dribbles_vs","dribble_tackles_pct","dribbled_past","pressures",
            "pressure_regains","pressure_regain_pct","pressures_def_3rd","pressures_mid_3rd",
            "pressures_att_3rd","blocks","blocked_shots","blocked_shots_saves","blocked_passes",
            "interceptions","clearances","errors"]

teams_defense = {
    "Tackles_Tkl": "tackles",
    "Tackles_TklW": "tackles_won",
    "Tackles_Def 3rd": "tackles_def_3rd",
    "Tackles_Mid 3rd": "tackles_mid_3rd",
    "Tackles_Att 3rd": "tackles_att_3rd",
    "Challenges_Tkl": "dribble_tackles",
    "Challenges_Att": "dribbles_vs",
    "Challenges_Tkl%": "dribble_tackles_pct",
    "Challenges_Lost": "dribbled_past",
    "Blocks_Blocks": "blocks",
    "Blocks_Sh": "blocked_shots",
    "Blocks_Pass": "blocked_passes",
    "Int": "interceptions",
    "Clr": "clearances",
    "Err": "errors",
}

df_teams_vs_defense.rename(columns=teams_defense, inplace=True)
df_teams_vs_defense.drop(columns=[
    'players_used',
    '90s',
    'Tkl+Int',
    'url',
], inplace=True)
df_teams_vs_defense.reset_index(inplace=True)
assert len(df_teams_vs_defense.columns) == 18

logger.info(f"Shape: {df_teams_vs_defense.shape}")
df_teams_vs_defense.to_csv(teams_vs_dir / "teams_vs_defense.csv", index=False)

### possession

In [None]:
df_team_vs_possession = fbref.read_team_season_stats("possession", opponent_stats=True)
df_team_vs_possession.columns = df_team_vs_possession.columns.to_flat_index()
df_team_vs_possession.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_team_vs_possession.columns]

possession2 = ["touches","touches_def_pen_area","touches_def_3rd","touches_mid_3rd","touches_att_3rd",
               "touches_att_pen_area","touches_live_ball","dribbles_completed","dribbles","dribbles_completed_pct",
               "players_dribbled_past","nutmegs","carries","carry_distance","carry_progressive_distance",
               "progressive_carries","carries_into_final_third","carries_into_penalty_area","pass_targets",
               "passes_received","passes_received_pct","miscontrols","dispossessed"]


teams_possession = {
    "Touches_Touches": "touches",
    "Touches_Def Pen": "touches_def_pen_area",
    "Touches_Def 3rd": "touches_def_3rd",
    "Touches_Mid 3rd": "touches_mid_3rd",
    "Touches_Att 3rd": "touches_att_3rd",
    "Touches_Att Pen": "touches_att_pen_area",
    "Touches_Live": "touches_live_ball",
    "Take-Ons_Att": "dribbles",
    "Take-Ons_Succ": "dribbles_completed",
    "Take-Ons_Succ%": "dribbles_completed_pct",
    "Take-Ons_Tkld": "dribbles_tackled",
    "Take-Ons_Tkld%": "dribbles_tackled_pct",
    "Carries_Carries": "carries",
    "Carries_TotDist": "carry_distance",
    "Carries_PrgDist": "carry_progressive_distance",
    "Carries_PrgC": "progressive_carries",
    "Carries_1/3": "carries_into_final_third",
    "Carries_CPA": "carries_into_penalty_area",
    "Carries_Mis": "miscontrols",
    "Carries_Dis": "dispossessed",
    "Receiving_Rec": "passes_received",
    "Receiving_PrgR": "progressive_passes_received",
}

df_team_vs_possession.rename(columns=teams_possession, inplace=True)
df_team_vs_possession.drop(columns=[
    'players_used',
    'Poss',
    '90s',
    'url',
], inplace=True)
df_team_vs_possession.reset_index(inplace=True)
assert len(df_team_vs_possession.columns) == 25

logger.info(f"Shape: {df_team_vs_possession.shape}")
df_team_vs_possession.to_csv(teams_vs_dir / "teams_vs_possession.csv", index=False)

### misc

In [None]:
df_teams_vs_misc = fbref.read_team_season_stats("misc", opponent_stats=True)
df_teams_vs_misc.columns = df_teams_vs_misc.columns.to_flat_index()
df_teams_vs_misc.columns = [f"{a}_{b}" if b else f"{a}" for a, b in df_teams_vs_misc.columns]

teams_misc = {
    "Performance_CrdY": "cards_yellow",
    "Performance_CrdR": "cards_red",
    "Performance_2CrdY": "cards_yellow_red",
    "Performance_Fls": "fouls",
    "Performance_Fld": "fouled",
    "Performance_Off": "offsides",
    "Performance_Crs": "crosses",
    "Performance_Int": "interceptions",
    "Performance_TklW": "tackles_won",
    "Performance_PKwon": "pens_won",
    "Performance_PKcon": "pens_conceded",
    "Performance_OG": "own_goals",
    "Performance_Recov": "ball_recoveries",
    "Aerial Duels_Won": "aerials_won",
    "Aerial Duels_Lost": "aerials_lost",
    "Aerial Duels_Won%": "aerials_won_pct",
}

df_teams_vs_misc.rename(columns=teams_misc, inplace=True)
df_teams_vs_misc.drop(columns=[
    'players_used',
    '90s',
    'url',
], inplace=True)
df_teams_vs_misc.reset_index(inplace=True)
assert len(df_teams_vs_misc.columns) == 19

logger.info(f"Shape: {df_teams_vs_misc.shape}")
df_teams_vs_misc.to_csv(teams_vs_dir / "teams_vs_misc.csv", index=False)