# Analysis: Bakayoko replacement

In [None]:
# Analysis: Bakayoko replacement
# Name: Yasin Tunçbilek
# Date: 9 February 2024

In [1]:
# Library to send HTTP requests
import requests

In [7]:
# URL of Eredivisie stats
eredivisie_url = "https://fbref.com/en/comps/23/stats/Eredivisie-Stats"

In [None]:
# Download HTML of Eredivisie page
data = requests.get(eredivisie_url)

In [12]:
# Library to parse HTML
from bs4 import BeautifulSoup

In [38]:
# Initialise library with downloaded HTML
soup = BeautifulSoup(data.text)

In [65]:
# Select menu where URLs to various stats reside in
menu_stats = soup.select('li.full.current.hasmore')[0]

In [66]:
# Find all tags where URLs to various stats reside in
urls_stats = menu_stats.find_all('a')

In [68]:
# Get href property of each link
urls_stats = [u.get("href") for u in urls_stats]

In [69]:
# Make full URLs
urls_stats = [f"https://fbref.com{l}" for l in urls_stats]

In [70]:
urls_stats

['https://fbref.com/en/comps/23/stats/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/keepers/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/keepersadv/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/shooting/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/passing/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/passing_types/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/gca/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/defense/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/possession/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/playingtime/Eredivisie-Stats',
 'https://fbref.com/en/comps/23/misc/Eredivisie-Stats']

In [71]:
# URL of shooting stats
shooting_url = urls_stats[3]

In [248]:
# Download HTML of shooting page. Table is commented out. Therefore a replace.
data = requests.get(shooting_url).text.replace('<!--','').replace('-->','')

In [249]:
# Library to deal with various types of I/O. Needed because read_html gives an error.
from io import StringIO

In [250]:
# Wrap in a 'StringIO' object to read from a literal string
data = StringIO(data)

In [83]:
# Pandas library for data analysis and manipulation
import pandas as pd

In [251]:
# Create dataframe of player shooting stats
player_shooting_stats = pd.read_html(data, attrs = {'id':'stats_shooting'})[0]

In [252]:
# Drop the first level of the dataframe
player_shooting_stats.columns = player_shooting_stats.columns.droplevel()

In [253]:
# Show first five rows of dataframe
player_shooting_stats.head()

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches
0,1,Patrick van Aanholt,nl NED,DF,PSV Eindhoven,33-165,1990,10.7,0,22,...,20.5,1,0,0,1.6,1.6,0.07,-1.6,-1.6,Matches
1,2,Jayden Addai,gh GHA,FW,AZ Alkmaar,18-168,2005,2.0,0,2,...,16.8,0,0,0,0.1,0.1,0.03,-0.1,-0.1,Matches
2,3,Bobby Adekanye,nl NED,"FW,MF",Go Ahead Eag,24-361,1999,11.1,0,19,...,19.6,0,0,0,1.3,1.3,0.07,-1.3,-1.3,Matches
3,4,Shawn Adewoye,be BEL,DF,RKC Waalwijk,23-226,2000,16.1,0,12,...,11.6,0,0,0,1.0,1.0,0.08,-1.0,-1.0,Matches
4,5,Nikolas Agrafiotis,nl NED,FW,Excelsior,23-291,2000,8.4,5,29,...,14.7,0,0,0,4.0,4.0,0.14,1.0,1.0,Matches


In [275]:
# Download HTML of shooting page
data = requests.get(shooting_url)

In [276]:
# Create dataframe of team shooting stats of first table on the page. Note: could not solve StringIO error-message here.
team_shooting_stats = pd.read_html(data.text, match = "Squad Shooting")[0]

  team_shooting_stats = pd.read_html(data.text, match = "Squad Shooting")[0]


In [277]:
# Drop the first level of the dataframe
team_shooting_stats.columns = team_shooting_stats.columns.droplevel()

In [278]:
# Show first five rows of dataframe
team_shooting_stats.head()

Unnamed: 0,Squad,# Pl,90s,Gls,Sh,SoT,SoT%,Sh/90,SoT/90,G/Sh,G/SoT,Dist,FK,PK,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG
0,Ajax,32,20.0,46,342,134,39.2,17.1,6.7,0.12,0.31,16.6,5,4,5,43.6,39.7,0.12,2.4,2.3
1,Almere City,26,20.0,20,225,64,28.4,11.25,3.2,0.08,0.28,17.9,8,2,2,23.0,21.4,0.1,-3.0,-3.4
2,AZ Alkmaar,31,20.0,38,279,101,36.2,13.95,5.05,0.13,0.36,16.7,11,2,2,33.9,32.4,0.12,4.1,3.6
3,Excelsior,23,20.0,31,218,85,39.0,10.9,4.25,0.14,0.36,16.0,6,0,0,27.1,27.1,0.13,3.9,3.9
4,Feyenoord,25,20.0,53,403,153,38.0,20.15,7.65,0.13,0.34,16.5,14,1,3,53.7,51.3,0.13,-0.7,0.7


In [279]:
# Specify columns to add a prefix to
cols = team_shooting_stats.loc[:, team_shooting_stats.columns != "Squad"]

In [280]:
# Add "team_" in front of team variables to distinguish between player stats
team_shooting_stats = team_shooting_stats.rename(columns = {c: 'team_' + c for c in team_shooting_stats.columns if c in cols})

In [282]:
team_shooting_stats

Unnamed: 0,Squad,team_# Pl,team_90s,team_Gls,team_Sh,team_SoT,team_SoT%,team_Sh/90,team_SoT/90,team_G/Sh,team_G/SoT,team_Dist,team_FK,team_PK,team_PKatt,team_xG,team_npxG,team_npxG/Sh,team_G-xG,team_np:G-xG
0,Ajax,32,20.0,46,342,134,39.2,17.1,6.7,0.12,0.31,16.6,5,4,5,43.6,39.7,0.12,2.4,2.3
1,Almere City,26,20.0,20,225,64,28.4,11.25,3.2,0.08,0.28,17.9,8,2,2,23.0,21.4,0.1,-3.0,-3.4
2,AZ Alkmaar,31,20.0,38,279,101,36.2,13.95,5.05,0.13,0.36,16.7,11,2,2,33.9,32.4,0.12,4.1,3.6
3,Excelsior,23,20.0,31,218,85,39.0,10.9,4.25,0.14,0.36,16.0,6,0,0,27.1,27.1,0.13,3.9,3.9
4,Feyenoord,25,20.0,53,403,153,38.0,20.15,7.65,0.13,0.34,16.5,14,1,3,53.7,51.3,0.13,-0.7,0.7
5,Fortuna Sittard,28,20.0,20,256,79,30.9,12.8,3.95,0.07,0.23,18.6,11,2,6,25.5,20.8,0.08,-5.5,-2.8
6,Go Ahead Eag,22,20.0,34,283,119,42.0,14.15,5.95,0.11,0.26,17.6,8,3,3,29.5,27.1,0.1,4.5,3.9
7,Heerenveen,24,20.0,30,274,91,33.2,13.7,4.55,0.09,0.29,17.8,6,4,4,28.8,25.7,0.1,1.2,0.3
8,Heracles Almelo,26,20.0,26,223,73,32.7,11.15,3.65,0.11,0.33,17.6,4,2,3,26.4,24.1,0.11,-0.4,-0.1
9,NEC Nijmegen,28,21.0,39,258,110,42.6,12.29,5.24,0.14,0.32,17.5,7,4,4,28.3,25.1,0.1,10.7,9.9


In [286]:
# Merge both dataframes into one dataframe
merged_stats = player_shooting_stats.merge(team_shooting_stats[["Squad", "team_Gls", "team_Sh", "team_SoT"]], on = 'Squad', how = 'left')

In [287]:
merged_stats

Unnamed: 0,Rk,Player,Nation,Pos,Squad,Age,Born,90s,Gls,Sh,...,PKatt,xG,npxG,npxG/Sh,G-xG,np:G-xG,Matches,team_Gls,team_Sh,team_SoT
0,1,Patrick van Aanholt,nl NED,DF,PSV Eindhoven,33-165,1990,10.7,0,22,...,0,1.6,1.6,0.07,-1.6,-1.6,Matches,62.0,455.0,169.0
1,2,Jayden Addai,gh GHA,FW,AZ Alkmaar,18-168,2005,2.0,0,2,...,0,0.1,0.1,0.03,-0.1,-0.1,Matches,38.0,279.0,101.0
2,3,Bobby Adekanye,nl NED,"FW,MF",Go Ahead Eag,24-361,1999,11.1,0,19,...,0,1.3,1.3,0.07,-1.3,-1.3,Matches,34.0,283.0,119.0
3,4,Shawn Adewoye,be BEL,DF,RKC Waalwijk,23-226,2000,16.1,0,12,...,0,1.0,1.0,0.08,-1.0,-1.0,Matches,19.0,246.0,81.0
4,5,Nikolas Agrafiotis,nl NED,FW,Excelsior,23-291,2000,8.4,5,29,...,0,4.0,4.0,0.14,+1.0,+1.0,Matches,31.0,218.0,85.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494,476,Lequincio Zeefuik,nl NED,DF,AZ Alkmaar,19-076,2004,0.1,0,1,...,0,0.0,0.0,0.02,0.0,0.0,Matches,38.0,279.0,101.0
495,477,Lequincio Zeefuik,nl NED,"FW,MF",Volendam,19-076,2004,8.6,3,19,...,0,1.9,1.9,0.10,+1.1,+1.1,Matches,21.0,188.0,66.0
496,478,Ramiz Zerrouki,dz ALG,MF,Feyenoord,25-260,1998,6.2,0,6,...,0,0.2,0.2,0.04,-0.2,-0.2,Matches,53.0,403.0,153.0
497,479,Giovanni van Zwam,nl NED,DF,Vitesse,20-039,2004,2.6,0,1,...,0,0.1,0.1,0.12,-0.1,-0.1,Matches,13.0,225.0,76.0
