Cleaning merged league data from FBref:

In [7]:
# Import libraries:
import pandas as pd
import os

In [12]:
# Global variables:
repo_dir = os.getcwd()  # Directory of the script
merged_data_dir = os.path.join(repo_dir, "Merged Data")   # Path to the Merged Data folder
league_ids = ['ENG-Premier League', 'ESP-La Liga', 'FRA-Ligue 1', 'GER-Bundesliga', 'ITA-Serie A'] # For league-level iteration
epl_file_path = os.path.join(merged_data_dir, "ENG-Premier League_full_merge.xlsx")

In [27]:
# Global functions:


# Function that imports data from Github data folder:
def import_merged_data(file_path):
    df = pd.read_excel(file_path)
    return df


# Function that removes unnamed columns:
def remove_unnamed_cols(df):

    # Create list of unnamed columns:
    columns = df.columns
    unnamed_cols = [col for col in columns if "Unnamed" in col]

    # Create a new df and set index:
    new_df = (df
              .drop(columns=unnamed_cols)
              .set_index('league')
              )

    return new_df

In [14]:
# Import the EPL data for experimental cleaning:
epl_df = import_merged_data(epl_file_path)

In [28]:
# Make a local copy of the dataframe for efficiency:
epl_df_copy = epl_df.copy(deep=True)

In [29]:
# Remove all unnamed columns from the dataframe:
clean_epl_df_copy = remove_unnamed_cols(epl_df_copy)
clean_epl_df_copy

Unnamed: 0_level_0,season,team,player,nationality,position,age,YOB,MP,Starts,Min,...,onG,onGA,+/-,+/-90,On-Off,onxG,onxGA,xG+/-,xG+/-90,On-Off.1
league,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ENG-Premier League,1718,Arsenal,Aaron Ramsey,WAL,MF,26.0,1990.0,24.0,21.0,1846.0,...,42.0,25.0,17.0,0.83,0.49,42.8,23.0,19.8,0.97,0.93
ENG-Premier League,1718,Arsenal,Ainsley Maitland-Niles,ENG,"DF,MF",19.0,1997.0,15.0,8.0,914.0,...,17.0,16.0,1.0,0.10,-0.69,14.3,14.3,0.0,0.00,-0.74
ENG-Premier League,1718,Arsenal,Alex Iwobi,NGA,"MF,FW",21.0,1996.0,26.0,22.0,1830.0,...,36.0,26.0,10.0,0.49,-0.24,35.6,25.5,10.2,0.50,-0.08
ENG-Premier League,1718,Arsenal,Alex Oxlade-Chamberlain,ENG,DF,23.0,1993.0,3.0,3.0,241.0,...,4.0,7.0,-3.0,-1.12,-1.86,4.2,4.6,-0.4,-0.14,-0.74
ENG-Premier League,1718,Arsenal,Alexandre Lacazette,FRA,FW,26.0,1991.0,32.0,26.0,2202.0,...,51.0,28.0,23.0,0.94,0.94,46.9,27.4,19.5,0.80,0.72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ENG-Premier League,2223,Wolves,Rúben Neves,POR,MF,25.0,1997.0,35.0,33.0,3019.0,...,26.0,48.0,-22.0,-0.66,0.47,32.5,51.7,-19.2,-0.57,0.29
ENG-Premier League,2223,Wolves,Sasa Kalajdzic,AUT,FW,25.0,1997.0,1.0,1.0,45.0,...,1.0,0.0,1.0,2.00,2.75,0.6,0.6,0.0,0.04,0.65
ENG-Premier League,2223,Wolves,Toti Gomes,POR,DF,23.0,1999.0,17.0,10.0,984.0,...,10.0,14.0,-4.0,-0.37,0.48,10.1,21.3,-11.2,-1.03,-0.59
ENG-Premier League,2223,Wolves,Willy Boly,,,,,,,,...,,,,,,,,,,
