In [67]:
convert_into_p90 = 'Y' #Set it to N if you do not want to convert stats into p90
path = 'D:\\Data Analysis TP\\2021_22\\FBREF Top 5' #Provide the path here where you want files to be downloaded 

In [68]:
#Thanks a lot to: 
#1. Statsbomb/FBref for publicly sharing stats 
#2. Shreyas Khatri (Twitter: @khatri_shreyas) whose piece of code I used  to download  files from FBRef.
#OG link: https://github.com/shreyas7kha/FootballFbrefPlotting

import os
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from bs4 import BeautifulSoup as soup

from datetime import date
  
os.chdir(path)
    
# Returns the current local date
today = date.today()

path = os.getcwd() + '\FBREF Files for ' + str(today)
os.mkdir(path)

# A function which can read data from a Fbref webpage
def readfromhtml(filepath):
    df = pd.read_html(filepath)[0]
    
    column_lst = list(df.columns)
    unique_col_names = []
    for col in column_lst:
        if col[1] not in unique_col_names:
            unique_col_names.append(col[1])
        else:
            unique_col_names.append(col[0]+' '+col[1])

    df.columns = unique_col_names
    df.drop(df[df['Player'] == 'Player'].index, inplace=True)
    df = df.fillna('0')
    df.set_index('Rk', drop=True, inplace=True)
    try:
        df['Comp'] = df['Comp'].apply(lambda x: ' '.join(x.split()[1:]))
        df['Nation'] = df['Nation'].astype(str)
        df['Nation'] = df['Nation'].apply(lambda x: x.split()[-1])
    except:
        print('Error in uploading file:' + filepath)
    finally:
        df = df.apply(pd.to_numeric, errors='ignore')
        return df


# If you want all data for the big 5 leagues, you just need to run this function with
# the filepath where you want to save all the files
# def save_all_csvs(base_url='https://fbref.com/en/comps/Big5/Big-5-European-Leagues-Stats',
#                   filepath=os.getcwd()):
def save_all_csvs(base_url='https://fbref.com/en/comps/Big5/Big-5-European-Leagues-Stats',
                  filepath=path):

    req = requests.get(base_url)
    parse_soup = soup(req.content, 'lxml')
    scripts = parse_soup.find_all('ul')
    url_list = scripts[4]
    urls = []
    for url in url_list.find_all('a', href=True):
        urls.append(url['href'])
    urls = [base_url[:17] + url for url in urls]
    for url in urls:
        df = readfromhtml(url)
        filename = url.split('/')[6]
        try:
            df.to_csv(filepath + '\\' + filename + '.csv', encoding='utf-8-sig')
        except:
            print('An error occurred in saving the file')
        else:
            print('File has been saved as {0} at {1} in format YYYY-MM-DD '.format(filename, filepath))

In [69]:
save_all_csvs()

File has been saved as stats at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as keepers at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as keepersadv at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as shooting at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as passing at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as passing_types at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as gca at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has been saved as defense at D:\Data Analysis TP\2021_22\FBREF Top 5\FBREF Files for 2022-01-07 in format YYYY-MM-DD 
File has bee

In [71]:
if convert_into_p90 == 'Y':
    import glob

    os.chdir(path)
    extension = 'csv'
    all_filenames = [i for i in glob.glob('*.{}'.format(extension))]

    print(all_filenames)

    for file in all_filenames:
        print('Processing file:', file)
        df = pd.read_csv(file)
        cols = df.columns
        newcols = cols[9:-1] #Remove all columns till '90s' and the last 'Matches' column

        for col in newcols:
            if ('p90' not in col) and ('%' not in col) and ('/90' not in col): 
                name = col+' p90'
                df[name] = round(df[col]/df['90s'],2)
        p90_path = path + '\\'+ file.split('.')[0] + ' p90.csv'
        df.to_csv(p90_path)
        
    print('All stats are now converted to p90')

['defense.csv', 'gca.csv', 'keepers.csv', 'keepersadv.csv', 'misc.csv', 'passing.csv', 'passing_types.csv', 'playingtime.csv', 'possession.csv', 'shooting.csv', 'stats.csv']
Processing file: defense.csv
Processing file: gca.csv
Processing file: keepers.csv
Processing file: keepersadv.csv
Processing file: misc.csv
Processing file: passing.csv
Processing file: passing_types.csv
Processing file: playingtime.csv
Processing file: possession.csv
Processing file: shooting.csv
Processing file: stats.csv
