Python script to scrape player in-game statistics data from fbref.com and merge with player valuations. Extracted data from 10 professional soccer leagues around the world.

In [1]:
# libraries
import pandas as pd
import numpy as np
import requests
import time
import random
import fuzzywuzzy as fuzz

years_special = ['2021-2022','2022-2023']
numbers = [11,20,9,12,13,37,23,32]
leagues = ['-Serie-A-Stats','-Bundesliga-Stats',
           '-Premier-League-Stats','-La-Liga-Stats',
           '-Ligue-1-Stats','-Belgian-Pro-League-Stats','-Eredivisie-Stats',
           '-Primeira-Liga-Stats']
           
league_names = ['Serie A','Bundesliga','Premier League','La Liga', 
               'Ligue 1', 'Jupiler Pro League', 'Eredivisie', 'Liga Nos']
                
count = 0
prev_season = []
next_season = []
table = 2
for n in range(len(numbers)):
    for y in years_special:
        if leagues[count] == '-Belgian-Pro-League-Stats' and y == '2021-2022':
            table = 4
        elif leagues[count] == '-Belgian-Pro-League-Stats' and y == '2022-2023':
            table = 5
            
        data = 'https://fbref.com/en/comps/' + str(numbers[count]) + '/' + y + '/' + y + leagues[count]
        response = requests.get(data).text.replace('<!--', '').replace('-->', '')
        df = pd.read_html(response, header=1)[table]
        table = 2
        df['Based'] = league_names[count]
        df['Season'] = y
        df = df[['Squad', '# Pl', 'Age', 'Poss', 'MP', 'Starts', 'Min', '90s', 'Gls',
       'Ast', 'G+A', 'G-PK', 'PK', 'PKatt', 'CrdY', 'CrdR', 'xG', 'npxG',
       'xAG', 'npxG+xAG', 'PrgC', 'PrgP', 'Gls.1', 'Ast.1', 'G+A.1', 'G-PK.1',
       'G+A-PK', 'xG.1', 'xAG.1', 'xG+xAG', 'npxG.1', 'npxG+xAG.1', 'Based',
       'Season']]
        
        if y == '2021-2022':
            prev_season.append(df)
        else:
            next_season.append(df)
        
        time.sleep(3)
        print('Imported squad data for the ' + league_names[count] + ' ' + y + ' season')

    count += 1
    
prev_season = pd.concat(prev_season)
next_season = pd.concat(next_season)

Imported squad data for the Serie A 2021-2022 season
Imported squad data for the Serie A 2022-2023 season
Imported squad data for the Bundesliga 2021-2022 season
Imported squad data for the Bundesliga 2022-2023 season
Imported squad data for the Premier League 2021-2022 season
Imported squad data for the Premier League 2022-2023 season
Imported squad data for the La Liga 2021-2022 season
Imported squad data for the La Liga 2022-2023 season
Imported squad data for the Ligue 1 2021-2022 season
Imported squad data for the Ligue 1 2022-2023 season
Imported squad data for the Jupiler Pro League 2021-2022 season
Imported squad data for the Jupiler Pro League 2022-2023 season
Imported squad data for the Eredivisie 2021-2022 season
Imported squad data for the Eredivisie 2022-2023 season
Imported squad data for the Liga Nos 2021-2022 season
Imported squad data for the Liga Nos 2022-2023 season


In [3]:
prev_season.columns

Index(['Squad', '# Pl', 'Age', 'Poss', 'MP', 'Starts', 'Min', '90s', 'Gls',
       'Ast', 'G+A', 'G-PK', 'PK', 'PKatt', 'CrdY', 'CrdR', 'xG', 'npxG',
       'xAG', 'npxG+xAG', 'PrgC', 'PrgP', 'Gls.1', 'Ast.1', 'G+A.1', 'G-PK.1',
       'G+A-PK', 'xG.1', 'xAG.1', 'xG+xAG', 'npxG.1', 'npxG+xAG.1', 'Based',
       'Season'],
      dtype='object')

In [4]:
prev_season

Unnamed: 0,Squad,# Pl,Age,Poss,MP,Starts,Min,90s,Gls,Ast,...,G+A.1,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1,Based,Season
0,Atalanta,32,27.5,55.0,38,418,3420,38.0,62,48,...,2.89,1.50,2.76,1.64,1.28,2.92,1.53,2.81,Serie A,2021-2022
1,Bologna,36,26.6,50.6,38,418,3420,38.0,43,34,...,2.03,1.03,1.92,1.18,0.88,2.06,1.08,1.96,Serie A,2021-2022
2,Cagliari,33,26.5,44.5,38,418,3420,38.0,34,26,...,1.58,0.82,1.50,1.03,0.78,1.81,0.96,1.74,Serie A,2021-2022
3,Empoli,28,24.5,47.4,38,418,3420,38.0,47,27,...,1.95,1.05,1.76,1.18,0.76,1.94,1.04,1.80,Serie A,2021-2022
4,Fiorentina,28,26.4,57.7,38,418,3420,38.0,59,33,...,2.42,1.32,2.18,1.58,1.03,2.61,1.34,2.37,Serie A,2021-2022
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,Santa Clara,32,26.8,45.8,34,374,3060,34.0,36,22,...,1.71,0.94,1.59,1.13,0.79,1.92,1.04,1.83,Liga Nos,2021-2022
14,Sporting CP,31,26.6,59.8,34,374,3060,34.0,71,45,...,3.41,1.82,3.15,2.09,1.43,3.52,1.84,3.27,Liga Nos,2021-2022
15,Tondela,27,25.4,45.9,34,374,3060,34.0,41,25,...,1.94,0.94,1.68,1.03,0.65,1.68,0.84,1.49,Liga Nos,2021-2022
16,Vitória,31,24.8,48.7,34,374,3060,34.0,50,25,...,2.21,1.18,1.91,1.45,0.93,2.38,1.26,2.19,Liga Nos,2021-2022


In [5]:
prev_season.to_csv('squads_2021-2022.csv')
next_season.to_csv('squads_2022-2023.csv')