### Web Scraping from FBREF

In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

### Scrape with BeautifulSoup

In [4]:
resp = requests.get('https://fbref.com/en/comps/9/2022-2023/stats/2022-2023-Premier-League-Stats#all_stats_standard')
resp.status_code

200

In [14]:
soup = BeautifulSoup(resp.text, 'html.parser')

In [38]:
rows = []
for child in soup.find_all('tbody')[0].children:
    row = []
    for td in child:
        try:
            row.append(td.text.replace('\n', ''))
        except:
            continue
    if len(row) > 0:
        rows.append(row)


### Scrape With Selenium

In [8]:
## Run selenium and chrome driver to scrape data
import os.path
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

## Setup chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Ensure GUI is off
chrome_options.add_argument("--no-sandbox")

# Set path to chromedriver as per your configuration
homedir = os.path.expanduser("~")
webdriver_service = Service(f"{homedir}/chromedriver/stable/chromedriver")

# Choose Chrome Browser
browser = webdriver.Chrome(service=webdriver_service, options=chrome_options)

# Get page
browser.get("https://fbref.com/en/comps/9/2022-2023/stats/2022-2023-Premier-League-Stats#all_stats_standard")

# Get Toggle By 90 and click
toggleBy90 =browser.find_element(By.CSS_SELECTOR, 'button[id="stats_squads_standard_for_per_match_toggle"]')
toggleBy90.click()

page_source = BeautifulSoup(browser.page_source, 'html.parser')

# Get the data in the body
statfb = page_source.select('table[id="stats_squads_standard_for"] tbody tr')

statfb

[<tr data-row="0"><th class="left" data-stat="team" scope="row"><a href="/en/squads/18bb7c10/2022-2023/Arsenal-Stats">Arsenal</a></th><td class="right" data-stat="players_used">26</td><td class="center" data-stat="avg_age">24.7</td><td class="center" data-stat="possession">59.3</td><td class="right group_start" data-stat="games">38</td><td class="right" data-stat="games_starts">418</td><td class="right" csk="3420" data-stat="minutes">3,420</td><td class="right" data-stat="minutes_90s">38.0</td><td class="right group_start modified" data-original-value="84" data-stat="goals">2.21</td><td class="right modified" data-original-value="64" data-stat="assists">1.68</td><td class="right modified" data-original-value="148" data-stat="goals_assists">3.89</td><td class="right modified" data-original-value="81" data-stat="goals_pens">2.13</td><td class="right modified" data-original-value="3" data-stat="pens_made">0.08</td><td class="right modified" data-original-value="4" data-stat="pens_att">0.1

### Scrape with Panda

In [5]:
df = pd.read_html('https://fbref.com/en/comps/9/2022-2023/stats/2022-2023-Premier-League-Stats#all_stats_standard', header=1)

In [6]:
epl = df[0]

epl.to_csv("../data/epl-22-23.csv", sep=',', index=False, encoding='utf-8')
epl

Unnamed: 0,Squad,# Pl,Age,Poss,MP,Starts,Min,90s,Gls,Ast,...,Gls.1,Ast.1,G+A.1,G-PK.1,G+A-PK,xG.1,xAG.1,xG+xAG,npxG.1,npxG+xAG.1
0,Arsenal,26,24.7,59.3,38,418,3420,38.0,84,64,...,2.21,1.68,3.89,2.13,3.82,1.89,1.41,3.31,1.82,3.23
1,Aston Villa,26,27.0,49.3,38,418,3420,38.0,49,35,...,1.29,0.92,2.21,1.21,2.13,1.32,1.02,2.34,1.24,2.26
2,Bournemouth,31,26.3,40.4,38,418,3420,38.0,37,24,...,0.97,0.63,1.61,0.97,1.61,1.02,0.75,1.76,1.02,1.76
3,Brentford,25,26.2,43.8,38,418,3420,38.0,56,36,...,1.47,0.95,2.42,1.29,2.24,1.49,1.02,2.51,1.33,2.35
4,Brighton,29,26.3,60.2,38,418,3420,38.0,68,46,...,1.79,1.21,3.0,1.63,2.84,1.93,1.37,3.3,1.81,3.19
5,Chelsea,32,26.3,58.7,38,418,3420,38.0,37,27,...,0.97,0.71,1.68,0.89,1.61,1.3,1.02,2.32,1.24,2.26
6,Crystal Palace,26,26.7,46.3,38,418,3420,38.0,38,29,...,1.0,0.76,1.76,0.97,1.74,1.03,0.76,1.8,0.97,1.73
7,Everton,28,26.6,42.8,38,418,3420,38.0,32,24,...,0.84,0.63,1.47,0.76,1.39,1.19,0.85,2.04,1.13,1.98
8,Fulham,29,28.2,48.8,38,418,3420,38.0,52,33,...,1.37,0.87,2.24,1.24,2.11,1.22,0.75,1.97,1.03,1.78
9,Leeds United,29,25.3,47.0,38,418,3420,38.0,45,31,...,1.18,0.82,2.0,1.16,1.97,1.25,0.84,2.09,1.18,2.02
