# Analysis: Bakayoko replacement

In [None]:
# Analysis: Bakayoko replacement
# Name: Yasin Tunçbilek
# Date: 9 February 2024

In [1]:
# Library to send HTTP requests
import requests

In [7]:
# URL of Eredivisie stats
eredivisie_url = "https://fbref.com/en/comps/23/stats/Eredivisie-Stats"

In [None]:
# Download HTML of Eredivisie page
data = requests.get(eredivisie_url)

In [12]:
# Library to parse HTML
from bs4 import BeautifulSoup

In [38]:
# Initialise library with downloaded HTML
soup = BeautifulSoup(data.text)

In [65]:
# Select menu where URLs to various stats reside in
menu_stats = soup.select('li.full.current.hasmore')[0]

In [66]:
# Find all tags where URLs to various stats reside in
urls_stats = menu_stats.find_all('a')

In [68]:
# Get href property of each link
urls_stats = [u.get("href") for u in urls_stats]

In [69]:
# Make full URLs
urls_stats = [f"https://fbref.com{l}" for l in urls_stats]

In [70]:
urls_stats

['https://fbref.com/en/comps/23/stats/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/keepers/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/keepersadv/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/shooting/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/passing/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/passing_types/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/gca/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/defense/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/possession/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/playingtime/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/misc/Eredivisie-Stats']

In [301]:
# URL of shooting stats
shooting_url = urls_stats[3]

In [302]:
# Download HTML of shooting page. Table is commented out. Therefore a replace.
data = requests.get(shooting_url).text.replace('<!--','').replace('-->','')

In [303]:
# Library to deal with various types of I/O. Needed because read_html gives an error.
from io import StringIO

In [304]:
# Wrap in a 'StringIO' object to read from a literal string
data = StringIO(data)

In [305]:
# Pandas library for data analysis and manipulation
import pandas as pd

In [306]:
# Create dataframe of player shooting stats
player_shooting_stats = pd.read_html(data, attrs = {'id':'stats_shooting'})[0]

In [307]:
# Drop the first level of the dataframe
player_shooting_stats.columns = player_shooting_stats.columns.droplevel()

In [308]:
# Drop rows where column names occur because of the 'folded' table
player_shooting_stats = player_shooting_stats.loc[player_shooting_stats['Player'] != 'Player']

In [319]:
player_shooting_stats

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches
0,1,Patrick van Aanholt,nl NED,DF,PSV Eindhoven,33-166,1990,10.7,0,22,...,20.5,1,0,0,1.6,1.6,0.07,-1.6,-1.6,Matches
1,2,Paxten Aaronson,us USA,"FW,MF",Vitesse,20-169,2003,1.0,0,2,...,25.3,0,0,0,0.1,0.1,0.04,-0.1,-0.1,Matches
2,3,Jayden Addai,gh GHA,FW,AZ Alkmaar,18-169,2005,2.7,0,4,...,23.6,0,0,0,0.1,0.1,0.02,-0.1,-0.1,Matches
3,4,Bobby Adekanye,nl NED,"FW,MF",Go Ahead Eag,24-362,1999,11.1,0,19,...,19.6,0,0,0,1.3,1.3,0.07,-1.3,-1.3,Matches
4,5,Shawn Adewoye,be BEL,DF,RKC Waalwijk,23-227,2000,16.1,0,12,...,11.6,0,0,0,1.0,1.0,0.08,-1.0,-1.0,Matches
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
497,479,Lequincio Zeefuik,nl NED,"FW,DF",AZ Alkmaar,19-077,2004,0.3,0,1,...,32.1,0,0,0,0.0,0.0,0.02,0.0,0.0,Matches
498,480,Lequincio Zeefuik,nl NED,"FW,MF",Volendam,19-077,2004,8.6,3,19,...,17.0,0,0,0,1.9,1.9,0.10,+1.1,+1.1,Matches
499,481,Ramiz Zerrouki,dz ALG,MF,Feyenoord,25-261,1998,6.2,0,6,...,20.6,0,0,0,0.2,0.2,0.04,-0.2,-0.2,Matches
500,482,Giovanni van Zwam,nl NED,DF,Vitesse,20-040,2004,2.9,0,1,...,9.1,0,0,0,0.1,0.1,0.12,-0.1,-0.1,Matches


In [309]:
# Show first five rows of dataframe
player_shooting_stats.head()

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches
0,1,Patrick van Aanholt,nl NED,DF,PSV Eindhoven,33-166,1990,10.7,0,22,...,20.5,1,0,0,1.6,1.6,0.07,-1.6,-1.6,Matches
1,2,Paxten Aaronson,us USA,"FW,MF",Vitesse,20-169,2003,1.0,0,2,...,25.3,0,0,0,0.1,0.1,0.04,-0.1,-0.1,Matches
2,3,Jayden Addai,gh GHA,FW,AZ Alkmaar,18-169,2005,2.7,0,4,...,23.6,0,0,0,0.1,0.1,0.02,-0.1,-0.1,Matches
3,4,Bobby Adekanye,nl NED,"FW,MF",Go Ahead Eag,24-362,1999,11.1,0,19,...,19.6,0,0,0,1.3,1.3,0.07,-1.3,-1.3,Matches
4,5,Shawn Adewoye,be BEL,DF,RKC Waalwijk,23-227,2000,16.1,0,12,...,11.6,0,0,0,1.0,1.0,0.08,-1.0,-1.0,Matches


In [310]:
# Download HTML of shooting page
data = requests.get(shooting_url)

In [311]:
# Create dataframe of team shooting stats of first table on the page. Note: could not solve StringIO error-message here.
team_shooting_stats = pd.read_html(data.text, match = "Squad Shooting")[0]

  team_shooting_stats = pd.read_html(data.text, match = "Squad Shooting")[0]


In [312]:
# Drop the first level of the dataframe
team_shooting_stats.columns = team_shooting_stats.columns.droplevel()

In [313]:
# Show first five rows of dataframe
team_shooting_stats.head()

Unnamed: 0,Squad,# Pl,90s,Gls,Sh,SoT,SoT%,Sh/90,SoT/90,G/Sh,G/SoT,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG
0,Ajax,32,20.0,46,342,134,39.2,17.1,6.7,0.12,0.31,16.6,5,4,5,43.6,39.7,0.12,2.4,2.3
1,Almere City,26,21.0,20,236,65,27.5,11.24,3.1,0.08,0.28,18.0,8,2,2,23.4,21.9,0.1,-3.4,-3.9
2,AZ Alkmaar,31,21.0,38,289,104,36.0,13.76,4.95,0.12,0.35,16.8,13,2,2,34.9,33.4,0.12,3.1,2.6
3,Excelsior,24,21.0,31,230,88,38.3,10.95,4.19,0.13,0.35,16.1,7,0,0,28.0,28.0,0.13,3.0,3.0
4,Feyenoord,25,20.0,53,403,153,38.0,20.15,7.65,0.13,0.34,16.5,14,1,3,53.7,51.3,0.13,-0.7,0.7


In [314]:
# Specify columns to add a prefix to
cols = team_shooting_stats.loc[:, team_shooting_stats.columns != "Squad"]

In [315]:
# Add "team_" in front of team variables to distinguish between player stats
team_shooting_stats = team_shooting_stats.rename(columns = {c: 'team_' + c for c in team_shooting_stats.columns if c in cols})

In [316]:
team_shooting_stats

Unnamed: 0,Squad,team_# Pl,team_90s,team_Gls,team_Sh,team_SoT,team_SoT%,team_Sh/90,team_SoT/90,team_G/Sh,team_G/SoT,team_Dist,team_FK,team_PK,team_PKatt,team_xG,team_npxG,team_npxG/Sh,team_G-xG,team_np:G-xG
0,Ajax,32,20.0,46,342,134,39.2,17.1,6.7,0.12,0.31,16.6,5,4,5,43.6,39.7,0.12,2.4,2.3
1,Almere City,26,21.0,20,236,65,27.5,11.24,3.1,0.08,0.28,18.0,8,2,2,23.4,21.9,0.1,-3.4,-3.9
2,AZ Alkmaar,31,21.0,38,289,104,36.0,13.76,4.95,0.12,0.35,16.8,13,2,2,34.9,33.4,0.12,3.1,2.6
3,Excelsior,24,21.0,31,230,88,38.3,10.95,4.19,0.13,0.35,16.1,7,0,0,28.0,28.0,0.13,3.0,3.0
4,Feyenoord,25,20.0,53,403,153,38.0,20.15,7.65,0.13,0.34,16.5,14,1,3,53.7,51.3,0.13,-0.7,0.7
5,Fortuna Sittard,28,20.0,20,256,79,30.9,12.8,3.95,0.07,0.23,18.6,11,2,6,25.5,20.8,0.08,-5.5,-2.8
6,Go Ahead Eag,22,20.0,34,283,119,42.0,14.15,5.95,0.11,0.26,17.6,8,3,3,29.5,27.1,0.1,4.5,3.9
7,Heerenveen,24,20.0,30,274,91,33.2,13.7,4.55,0.09,0.29,17.8,6,4,4,28.8,25.7,0.1,1.2,0.3
8,Heracles Almelo,27,21.0,29,243,81,33.3,11.57,3.86,0.11,0.33,17.4,4,2,3,28.6,26.3,0.11,0.4,0.7
9,NEC Nijmegen,28,21.0,39,258,110,42.6,12.29,5.24,0.14,0.32,17.6,7,4,4,28.3,25.1,0.1,10.7,9.9


In [317]:
# Merge both dataframes into one dataframe
merged_stats = player_shooting_stats.merge(team_shooting_stats
                            [["Squad", "team_Gls", "team_Sh", "team_SoT"]], on = 'Squad', how = 'left')

In [318]:
merged_stats

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches,team_Gls,team_Sh,team_SoT
0,1,Patrick van Aanholt,nl NED,DF,PSV Eindhoven,33-166,1990,10.7,0,22,...,0,1.6,1.6,0.07,-1.6,-1.6,Matches,62,455,169
1,2,Paxten Aaronson,us USA,"FW,MF",Vitesse,20-169,2003,1.0,0,2,...,0,0.1,0.1,0.04,-0.1,-0.1,Matches,15,237,80
2,3,Jayden Addai,gh GHA,FW,AZ Alkmaar,18-169,2005,2.7,0,4,...,0,0.1,0.1,0.02,-0.1,-0.1,Matches,38,289,104
3,4,Bobby Adekanye,nl NED,"FW,MF",Go Ahead Eag,24-362,1999,11.1,0,19,...,0,1.3,1.3,0.07,-1.3,-1.3,Matches,34,283,119
4,5,Shawn Adewoye,be BEL,DF,RKC Waalwijk,23-227,2000,16.1,0,12,...,0,1.0,1.0,0.08,-1.0,-1.0,Matches,19,246,81
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478,479,Lequincio Zeefuik,nl NED,"FW,DF",AZ Alkmaar,19-077,2004,0.3,0,1,...,0,0.0,0.0,0.02,0.0,0.0,Matches,38,289,104
479,480,Lequincio Zeefuik,nl NED,"FW,MF",Volendam,19-077,2004,8.6,3,19,...,0,1.9,1.9,0.10,+1.1,+1.1,Matches,21,188,66
480,481,Ramiz Zerrouki,dz ALG,MF,Feyenoord,25-261,1998,6.2,0,6,...,0,0.2,0.2,0.04,-0.2,-0.2,Matches,53,403,153
481,482,Giovanni van Zwam,nl NED,DF,Vitesse,20-040,2004,2.9,0,1,...,0,0.1,0.1,0.12,-0.1,-0.1,Matches,15,237,80
