# FBRef Scraping Notebook
**Author:** Jake Thomas
**Last Modified:** 11/20/2024 (created: 11/17/2024)

**Description:** Scraping the FBref website and importing it into a pandas dataframe.

In [1]:
# import libraries
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By # used to import different ways to access data in the XML or HTML file
from selenium.webdriver.chrome.service import Service # no longer need to download a driver file, use service
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager # used to manage the Chrome driver to emulate a Chrome web browser
import time


In [2]:
# Scrape data

browser = webdriver.Chrome()

players = []
nations = []
positions = []
clubs = []
leagues = [] 
ages = []
matches_played = [] 
goals = []
assists = []
xG = []


url = "https://fbref.com/en/comps/Big5/2023-2024/stats/players/2023-2024-Big-5-European-Leagues-Stats"

browser.get(url)
browser.maximize_window()

time.sleep(5)

next_button = browser.find_element(By.ID, "stats_standard_control")

# Scroll to the button
browser.execute_script("arguments[0].scrollIntoView(true);", next_button)

browser.execute_script("arguments[0].click();", next_button)

player_elements = browser.find_elements(By.XPATH, '//td[@data-stat="player"]')

for player in player_elements:     # append quote text from this page to the quotes list
    players.append(player.text)


nation_elements = browser.find_elements(By.XPATH, '//td[@data-stat="nationality"]')

for nation in nation_elements:     # append quote text from this page to the quotes list
    nations.append(nation.text)


position_elements = browser.find_elements(By.XPATH, '//td[@data-stat="position"]')

for position in position_elements:     # append quote text from this page to the quotes list
    positions.append(position.text)


club_elements = browser.find_elements(By.XPATH, '//td[@data-stat="team"]')

for club in club_elements:     # append quote text from this page to the quotes list
    clubs.append(club.text)


league_elements = browser.find_elements(By.XPATH, '//td[@data-stat="comp_level"]')

for league in league_elements:     # append quote text from this page to the quotes list
    leagues.append(league.text)


age_elements = browser.find_elements(By.XPATH, '//td[@data-stat="age"]')

for age in age_elements:     # append quote text from this page to the quotes list
    ages.append(age.text)


matches_elements = browser.find_elements(By.XPATH, '//td[@data-stat="games"]')

for match in matches_elements:     # append quote text from this page to the quotes list
    matches_played.append(match.text)


goal_elements = browser.find_elements(By.XPATH, '//td[@data-stat="goals"]')


for goal in goal_elements:     # append quote text from this page to the quotes list
    goals.append(goal.text)


assist_elements = browser.find_elements(By.XPATH, '//td[@data-stat="assists"]')

for assist in assist_elements:     # append quote text from this page to the quotes list
    assists.append(assist.text)


xg_elements = browser.find_elements(By.XPATH, '//td[@data-stat="xg"]')

for xg in xg_elements:     # append quote text from this page to the quotes list
    xG.append(xg.text)



browser.close()



In [3]:
matches_played = matches_played[5:]
goals = goals[5:]
assists = assists[5:]
xG = xG[5:]


print(players)
print(nations)
print(positions)
print(clubs)
print(leagues)
print(ages)
print(matches_played)
print(goals)
print(assists)
print(xG)


print(len(players))
print(len(nations))
print(len(positions))
print(len(clubs))
print(len(leagues))
print(len(ages))
print(len(matches_played))
print(len(goals))
print(len(assists))
print(len(xG))

# convert lists to a pandas dataframe
fbref_df = pd.DataFrame({
    "Player": players,
    "Nationality": nations,
    "Position": positions,
    "Club Team": clubs,
    "League" : leagues,
    "Age" : ages,
    "Matches Played" : matches_played,
    "Goals" : goals,
    "Assists" : assists,
    "Expected Goals" : xG
})

display(fbref_df)

['Max Aarons', 'Brenden Aaronson', 'Paxten Aaronson', 'Keyliane Abdallah', 'Yunis Abdelhamid', 'Salis Abdul Samed', 'Nabil Aberdin', 'Laurent Abergel', 'Matthis Abline', 'Abner', 'Zakaria Aboukhlal', 'Abdel Abqar', 'Tammy Abraham', 'Francesco Acerbi', 'Joshua Acheampong', 'Marcos Acuña', 'Akor Adams', 'Tyler Adams', 'Junior Adamu', 'Sargis Adamyan', 'Tosin Adarabioyo', 'Elijah Adebayo', 'Karim Adeyemi', 'Simon Adingra', 'Nathaniel Adjei', 'Amine Adli', 'Yacine Adli', 'Adryelson', 'Adson', 'Michel Aebischer', 'Emmanuel Agbadou', 'Julen Agirrezabala', 'Lucien Agoume', 'Lucien Agoume', 'Felix Agu', 'Nayef Aguerd', 'Ruben Aguilar', 'Brandon Aguilera', 'Naouirou Ahamada', 'Anel Ahmedhodžić', 'Joseph Aidoo', 'Ola Aina', 'Rayan Aït-Nouri', 'Tosin Aiyegun', 'Kristoffer Ajer', 'Ludovic Ajorque', 'Manuel Akanji', 'Marley Aké', 'Nathan Aké', 'Ilias Akhomach', 'Sergio Akieme', 'Sergio Akieme', 'Ebenezer Akinsanmiro', 'Maghnes Akliouche', 'Paul Akouokou', 'Paul Akouokou', 'Jean-Daniel Akpa-Akpro', 

Unnamed: 0,Player,Nationality,Position,Club Team,League,Age,Matches Played,Goals,Assists,Expected Goals
0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23,20,0,1,0.0
1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22,30,2,2,2.0
2,Paxten Aaronson,us USA,MF,Eint Frankfurt,de Bundesliga,19,7,0,1,0.1
3,Keyliane Abdallah,fr FRA,FW,Marseille,fr Ligue 1,17,1,0,0,0.0
4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35,31,4,0,3.4
...,...,...,...,...,...,...,...,...,...,...
2847,Lovro Zvonarek,hr CRO,"FW,MF",Bayern Munich,de Bundesliga,18,5,1,0,0.1
2848,Martin Ødegaard,no NOR,MF,Arsenal,eng Premier League,24,35,8,10,7.4
2849,Milan Đurić,ba BIH,FW,Hellas Verona,it Serie A,33,20,5,1,4.5
2850,Milan Đurić,ba BIH,FW,Monza,it Serie A,33,17,4,1,3.0


In [4]:
# download as csv
fbref_df.to_csv("fbref_data_raw.csv", header=True, encoding="utf-8")
