# Premier League Prediction Project

In [2]:
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver

stats_url = 'https://fbref.com/en/comps/9/Premier-League-Stats'

### Accessing the website through Selenium to get data

In [3]:
options = webdriver.ChromeOptions()
options.add_argument("--headless")
driver = webdriver.Chrome(options = options)

driver.get(stats_url)
time.sleep(6)
html = driver.page_source
driver.quit()

soup = BeautifulSoup(html, "html.parser")
standing_table = soup.select_one("table.stats_table") # we only need the first one among table.stats_table elements

### Example of accessing each team's data

In [4]:

links = [l.get('href') for l in standing_table.find_all('a') if '/squads' in l.get('href')] # parse the team_url from Premier League Standings Table
team_url = [f'https://fbref.com{link}' for link in links]
liverpool_url = team_url[0] # first team (winner) of season 2024-2025
liverpool_url


'https://fbref.com/en/squads/822bd0ba/2024-2025/Liverpool-Stats'

In [5]:
options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options = options)

driver.get(liverpool_url)
time.sleep(6)
liverpool_html = driver.page_source
driver.quit()

liverpool_df = pd.read_html(liverpool_html, match="Scores & Fixtures")[0] # get the team's Scores & Fixtures table and convert it to pandas df
print(liverpool_df.head())

         Date           Time            Comp         Round  Day Venue Result  \
0  2024-08-17  12:30 (20:30)  Premier League   Matchweek 1  Sat  Away      W   
1  2024-08-25  16:30 (00:30)  Premier League   Matchweek 2  Sun  Home      W   
2  2024-09-01  16:00 (00:00)  Premier League   Matchweek 3  Sun  Away      W   
3  2024-09-14  15:00 (23:00)  Premier League   Matchweek 4  Sat  Home      L   
4  2024-09-17  21:00 (04:00)    Champions Lg  League phase  Tue  Away      W   

  GF GA         Opponent   xG  xGA Poss Attendance          Captain Formation  \
0  2  0     Ipswich Town  2.6  0.5   62      30014  Virgil van Dijk   4-2-3-1   
1  2  0        Brentford  2.5  0.5   62      60017  Virgil van Dijk   4-2-3-1   
2  3  0   Manchester Utd  1.8  1.4   47      73738  Virgil van Dijk   4-2-3-1   
3  0  1  Nott'ham Forest  0.9  0.4   68      60344  Virgil van Dijk   4-2-3-1   
4  3  1         it Milan  3.1  0.6   51      59826  Virgil van Dijk   4-2-3-1   

  Opp Formation         Referee 

  liverpool_df = pd.read_html(liverpool_html, match="Scores & Fixtures")[0] # get the team's Scores & Fixtures table and convert it to pandas df


## Getting All Team's Data Using Same Method

In [35]:
stats_url = 'https://fbref.com/en/comps/9/2023-2024/2023-2024-Premier-League-Stats'
seasons = [2024, 2023, 2022] # modified years as 2025 shooting & defense data supports different HTML/JS format
prem_matches = []

options = webdriver.ChromeOptions()
options.add_argument('--headless')
driver = webdriver.Chrome(options = options)

for season in seasons:
    driver.get(stats_url)
    time.sleep(6)
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    standing_table = soup.select_one('table.stats_table')

    links = [l.get('href') for l in standing_table.find_all('a') if '/squads' in l.get('href')] # get all links that's in <a> tag with path '/squads' from standing_table
    team_urls = [f'https://fbref.com{link}' for link in links] # convert it to team page links

    prev = soup.select_one('a.prev').get('href')
    stats_url = f'https://fbref.com{prev}' # set the stats_url link to previous season stats page to access previous season's data on next iteration

    for team_url in team_urls:
        team_name = team_url.split('/')[-1].replace('-Stats', '')
        driver.get(team_url)
        time.sleep(6)
        html = driver.page_source
        scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]

        shooting_url = '/'.join(team_url.split('/')[:-1]) + f'/matchlogs/all_comps/shooting/{team_name}-Match-Logs-All-Competitions'
        # shooting_url: https://fbref.com/en/squads/822bd0ba/2024-2025/matchlogs/all_comps/shooting/Liverpool-Match-Logs-All-Competitions form
        print(shooting_url)

        # check if shooting table exist since some teams are missing this data (ex. Aston Villa)
        try:
            driver.get(shooting_url)
            time.sleep(4)
            shooting_html = driver.page_source
            shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]
            shooting_table.columns = shooting_table.columns.droplevel()
        except:
            print(f'[SKIP] Shooting table for {team_name} - Season {season} does not exist')

        defense_url = '/'.join(team_url.split('/')[:-1]) + f'/matchlogs/all_comps/defense/{team_name}-Match-Logs-All-Competitions'
        # defense_url: https://fbref.com/en/squads/822bd0ba/2024-2025/matchlogs/all_comps/defense/Liverpool-Match-Logs-All-Competitions form
        print(defense_url)

        # check if defense table exist
        try: 
            driver.get(defense_url)
            time.sleep(6)
            defense_html = driver.page_source
            defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
            defense_table.columns = defense_table.columns.droplevel()
        except:
            print(f'[SKIP] Defense table for {team_name} - Season {season} does not exist')


        team_df = (scores_fixtures_table.merge(shooting_table[["Date", "Sh", "SoT", "Dist", "FK", "PK", "xG"]], on = 'Date', how = 'left')
        .merge(defense_table[["Date", "Tkl", "TklW", "Int", "Clr", "Blocks", "Err"]], on = 'Date', how = 'left'))
            
        team_df = team_df[team_df["Comp"] == "Premier League"]
        team_df['Season'] = season
        team_df['Team'] = team_name
        prem_matches.append(team_df)
    
driver.quit()
print(len(prem_matches))


  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b8fd03ef/2023-2024/matchlogs/all_comps/shooting/Manchester-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b8fd03ef/2023-2024/matchlogs/all_comps/defense/Manchester-City-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/18bb7c10/2023-2024/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/18bb7c10/2023-2024/matchlogs/all_comps/defense/Arsenal-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/822bd0ba/2023-2024/matchlogs/all_comps/shooting/Liverpool-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/822bd0ba/2023-2024/matchlogs/all_comps/defense/Liverpool-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8602292d/2023-2024/matchlogs/all_comps/shooting/Aston-Villa-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8602292d/2023-2024/matchlogs/all_comps/defense/Aston-Villa-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/361ca564/2023-2024/matchlogs/all_comps/shooting/Tottenham-Hotspur-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/361ca564/2023-2024/matchlogs/all_comps/defense/Tottenham-Hotspur-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cff3d9bb/2023-2024/matchlogs/all_comps/shooting/Chelsea-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cff3d9bb/2023-2024/matchlogs/all_comps/defense/Chelsea-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b2b47a98/2023-2024/matchlogs/all_comps/shooting/Newcastle-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b2b47a98/2023-2024/matchlogs/all_comps/defense/Newcastle-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/19538871/2023-2024/matchlogs/all_comps/shooting/Manchester-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/19538871/2023-2024/matchlogs/all_comps/defense/Manchester-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/7c21e445/2023-2024/matchlogs/all_comps/shooting/West-Ham-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/7c21e445/2023-2024/matchlogs/all_comps/defense/West-Ham-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/47c64c55/2023-2024/matchlogs/all_comps/shooting/Crystal-Palace-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/47c64c55/2023-2024/matchlogs/all_comps/defense/Crystal-Palace-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d07537b9/2023-2024/matchlogs/all_comps/shooting/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d07537b9/2023-2024/matchlogs/all_comps/defense/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/4ba7cbea/2023-2024/matchlogs/all_comps/shooting/Bournemouth-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/4ba7cbea/2023-2024/matchlogs/all_comps/defense/Bournemouth-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/fd962109/2023-2024/matchlogs/all_comps/shooting/Fulham-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/fd962109/2023-2024/matchlogs/all_comps/defense/Fulham-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8cec06e1/2023-2024/matchlogs/all_comps/shooting/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8cec06e1/2023-2024/matchlogs/all_comps/defense/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d3fd31cc/2023-2024/matchlogs/all_comps/shooting/Everton-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d3fd31cc/2023-2024/matchlogs/all_comps/defense/Everton-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cd051869/2023-2024/matchlogs/all_comps/shooting/Brentford-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cd051869/2023-2024/matchlogs/all_comps/defense/Brentford-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/e4a775cb/2023-2024/matchlogs/all_comps/shooting/Nottingham-Forest-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/e4a775cb/2023-2024/matchlogs/all_comps/defense/Nottingham-Forest-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/e297cd13/2023-2024/matchlogs/all_comps/shooting/Luton-Town-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/e297cd13/2023-2024/matchlogs/all_comps/defense/Luton-Town-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/943e8050/2023-2024/matchlogs/all_comps/shooting/Burnley-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/943e8050/2023-2024/matchlogs/all_comps/defense/Burnley-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/1df6b87e/2023-2024/matchlogs/all_comps/shooting/Sheffield-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/1df6b87e/2023-2024/matchlogs/all_comps/defense/Sheffield-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b8fd03ef/2022-2023/matchlogs/all_comps/shooting/Manchester-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b8fd03ef/2022-2023/matchlogs/all_comps/defense/Manchester-City-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/18bb7c10/2022-2023/matchlogs/all_comps/defense/Arsenal-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/19538871/2022-2023/matchlogs/all_comps/shooting/Manchester-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/19538871/2022-2023/matchlogs/all_comps/defense/Manchester-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b2b47a98/2022-2023/matchlogs/all_comps/shooting/Newcastle-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b2b47a98/2022-2023/matchlogs/all_comps/defense/Newcastle-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/822bd0ba/2022-2023/matchlogs/all_comps/shooting/Liverpool-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/822bd0ba/2022-2023/matchlogs/all_comps/defense/Liverpool-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d07537b9/2022-2023/matchlogs/all_comps/shooting/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d07537b9/2022-2023/matchlogs/all_comps/defense/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8602292d/2022-2023/matchlogs/all_comps/shooting/Aston-Villa-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8602292d/2022-2023/matchlogs/all_comps/defense/Aston-Villa-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/361ca564/2022-2023/matchlogs/all_comps/shooting/Tottenham-Hotspur-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/361ca564/2022-2023/matchlogs/all_comps/defense/Tottenham-Hotspur-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cd051869/2022-2023/matchlogs/all_comps/shooting/Brentford-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cd051869/2022-2023/matchlogs/all_comps/defense/Brentford-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/fd962109/2022-2023/matchlogs/all_comps/shooting/Fulham-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/fd962109/2022-2023/matchlogs/all_comps/defense/Fulham-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/47c64c55/2022-2023/matchlogs/all_comps/shooting/Crystal-Palace-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/47c64c55/2022-2023/matchlogs/all_comps/defense/Crystal-Palace-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cff3d9bb/2022-2023/matchlogs/all_comps/shooting/Chelsea-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cff3d9bb/2022-2023/matchlogs/all_comps/defense/Chelsea-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8cec06e1/2022-2023/matchlogs/all_comps/shooting/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8cec06e1/2022-2023/matchlogs/all_comps/defense/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/7c21e445/2022-2023/matchlogs/all_comps/shooting/West-Ham-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/7c21e445/2022-2023/matchlogs/all_comps/defense/West-Ham-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/4ba7cbea/2022-2023/matchlogs/all_comps/shooting/Bournemouth-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/4ba7cbea/2022-2023/matchlogs/all_comps/defense/Bournemouth-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/e4a775cb/2022-2023/matchlogs/all_comps/shooting/Nottingham-Forest-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/e4a775cb/2022-2023/matchlogs/all_comps/defense/Nottingham-Forest-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d3fd31cc/2022-2023/matchlogs/all_comps/shooting/Everton-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d3fd31cc/2022-2023/matchlogs/all_comps/defense/Everton-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/a2d435b3/2022-2023/matchlogs/all_comps/shooting/Leicester-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/a2d435b3/2022-2023/matchlogs/all_comps/defense/Leicester-City-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/5bfb9659/2022-2023/matchlogs/all_comps/shooting/Leeds-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/5bfb9659/2022-2023/matchlogs/all_comps/defense/Leeds-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/33c895d4/2022-2023/matchlogs/all_comps/shooting/Southampton-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/33c895d4/2022-2023/matchlogs/all_comps/defense/Southampton-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b8fd03ef/2021-2022/matchlogs/all_comps/shooting/Manchester-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b8fd03ef/2021-2022/matchlogs/all_comps/defense/Manchester-City-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/822bd0ba/2021-2022/matchlogs/all_comps/shooting/Liverpool-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/822bd0ba/2021-2022/matchlogs/all_comps/defense/Liverpool-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cff3d9bb/2021-2022/matchlogs/all_comps/shooting/Chelsea-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cff3d9bb/2021-2022/matchlogs/all_comps/defense/Chelsea-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/361ca564/2021-2022/matchlogs/all_comps/shooting/Tottenham-Hotspur-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/361ca564/2021-2022/matchlogs/all_comps/defense/Tottenham-Hotspur-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/18bb7c10/2021-2022/matchlogs/all_comps/shooting/Arsenal-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/18bb7c10/2021-2022/matchlogs/all_comps/defense/Arsenal-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/19538871/2021-2022/matchlogs/all_comps/shooting/Manchester-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/19538871/2021-2022/matchlogs/all_comps/defense/Manchester-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/7c21e445/2021-2022/matchlogs/all_comps/shooting/West-Ham-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/7c21e445/2021-2022/matchlogs/all_comps/defense/West-Ham-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/a2d435b3/2021-2022/matchlogs/all_comps/shooting/Leicester-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/a2d435b3/2021-2022/matchlogs/all_comps/defense/Leicester-City-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d07537b9/2021-2022/matchlogs/all_comps/shooting/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d07537b9/2021-2022/matchlogs/all_comps/defense/Brighton-and-Hove-Albion-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8cec06e1/2021-2022/matchlogs/all_comps/shooting/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8cec06e1/2021-2022/matchlogs/all_comps/defense/Wolverhampton-Wanderers-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/b2b47a98/2021-2022/matchlogs/all_comps/shooting/Newcastle-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/b2b47a98/2021-2022/matchlogs/all_comps/defense/Newcastle-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/47c64c55/2021-2022/matchlogs/all_comps/shooting/Crystal-Palace-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/47c64c55/2021-2022/matchlogs/all_comps/defense/Crystal-Palace-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/cd051869/2021-2022/matchlogs/all_comps/shooting/Brentford-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/cd051869/2021-2022/matchlogs/all_comps/defense/Brentford-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/8602292d/2021-2022/matchlogs/all_comps/shooting/Aston-Villa-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/8602292d/2021-2022/matchlogs/all_comps/defense/Aston-Villa-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/33c895d4/2021-2022/matchlogs/all_comps/shooting/Southampton-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/33c895d4/2021-2022/matchlogs/all_comps/defense/Southampton-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/d3fd31cc/2021-2022/matchlogs/all_comps/shooting/Everton-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/d3fd31cc/2021-2022/matchlogs/all_comps/defense/Everton-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/5bfb9659/2021-2022/matchlogs/all_comps/shooting/Leeds-United-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/5bfb9659/2021-2022/matchlogs/all_comps/defense/Leeds-United-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/943e8050/2021-2022/matchlogs/all_comps/shooting/Burnley-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/943e8050/2021-2022/matchlogs/all_comps/defense/Burnley-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/2abfe087/2021-2022/matchlogs/all_comps/shooting/Watford-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/2abfe087/2021-2022/matchlogs/all_comps/defense/Watford-Match-Logs-All-Competitions


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]
  scores_fixtures_table = pd.read_html(html, match = 'Scores & Fixtures')[0]


https://fbref.com/en/squads/1c781004/2021-2022/matchlogs/all_comps/shooting/Norwich-City-Match-Logs-All-Competitions


  shooting_table = pd.read_html(shooting_html, attrs = {'id': 'matchlogs_for'})[0]


https://fbref.com/en/squads/1c781004/2021-2022/matchlogs/all_comps/defense/Norwich-City-Match-Logs-All-Competitions
60


  defense_table = pd.read_html(defense_html, attrs = {'id': 'matchlogs_for'})[0]


In [36]:
print((len(prem_matches)))

60


In [37]:
prem_df = pd.concat(prem_matches)
prem_df.head()

Unnamed: 0,Date,Time,Comp,Round,Day,Venue,Result,GF,GA,Opponent,...,xG_y,Tkl,Tkl.1,TklW,Int,Clr,Blocks,Err,Season,Team
1,2023-08-11,20:00 (04:00),Premier League,Matchweek 1,Fri,Away,W,3,0,Burnley,...,1.9,17,12,12,4,7,9,2,2024,Manchester-City
3,2023-08-19,20:00 (04:00),Premier League,Matchweek 2,Sat,Home,W,1,0,Newcastle Utd,...,1.0,11,4,7,4,10,9,1,2024,Manchester-City
4,2023-08-27,14:00 (22:00),Premier League,Matchweek 3,Sun,Away,W,2,1,Sheffield Utd,...,3.5,12,7,8,4,6,10,0,2024,Manchester-City
5,2023-09-02,15:00 (23:00),Premier League,Matchweek 4,Sat,Home,W,5,1,Fulham,...,2.2,17,6,10,8,11,2,0,2024,Manchester-City
6,2023-09-16,15:00 (23:00),Premier League,Matchweek 5,Sat,Away,W,3,1,West Ham,...,3.6,11,5,7,7,9,7,0,2024,Manchester-City


In [38]:
prem_df.columns

Index(['Date', 'Time', 'Comp', 'Round', 'Day', 'Venue', 'Result', 'GF', 'GA',
       'Opponent', 'xG_x', 'xGA', 'Poss', 'Attendance', 'Captain', 'Formation',
       'Opp Formation', 'Referee', 'Match Report', 'Notes', 'Sh', 'SoT',
       'Dist', 'FK', 'PK', 'xG_y', 'Tkl', 'Tkl', 'TklW', 'Int', 'Clr',
       'Blocks', 'Err', 'Season', 'Team'],
      dtype='object')

In [39]:
prem_df.groupby('Team')['Date'].count() # count of total games each team has

Team
Arsenal                     114
Aston-Villa                 114
Bournemouth                  76
Brentford                   114
Brighton-and-Hove-Albion    114
Burnley                      76
Chelsea                     114
Crystal-Palace              114
Everton                     114
Fulham                       76
Leeds-United                 76
Leicester-City               76
Liverpool                   114
Luton-Town                   38
Manchester-City             114
Manchester-United           114
Newcastle-United            114
Norwich-City                 38
Nottingham-Forest            76
Sheffield-United             38
Southampton                  76
Tottenham-Hotspur           114
Watford                      38
West-Ham-United             114
Wolverhampton-Wanderers     114
Name: Date, dtype: int64

In [40]:
prem_df.groupby('Team')['Sh'].count() # count of total shooting data each team has

Team
Arsenal                     114
Aston-Villa                 114
Bournemouth                  76
Brentford                   114
Brighton-and-Hove-Albion    114
Burnley                      76
Chelsea                     114
Crystal-Palace              114
Everton                     114
Fulham                       76
Leeds-United                 76
Leicester-City               76
Liverpool                   114
Luton-Town                   38
Manchester-City             114
Manchester-United           114
Newcastle-United            114
Norwich-City                 38
Nottingham-Forest            76
Sheffield-United             38
Southampton                  76
Tottenham-Hotspur           114
Watford                      38
West-Ham-United             114
Wolverhampton-Wanderers     114
Name: Sh, dtype: int64

In [41]:
prem_df.groupby('Team')['Blocks'].count() # count of total defense data each team has

Team
Arsenal                     114
Aston-Villa                 114
Bournemouth                  76
Brentford                   114
Brighton-and-Hove-Albion    114
Burnley                      76
Chelsea                     114
Crystal-Palace              114
Everton                     114
Fulham                       76
Leeds-United                 76
Leicester-City               76
Liverpool                   114
Luton-Town                   38
Manchester-City             114
Manchester-United           114
Newcastle-United            114
Norwich-City                 38
Nottingham-Forest            76
Sheffield-United             38
Southampton                  76
Tottenham-Hotspur           114
Watford                      38
West-Ham-United             114
Wolverhampton-Wanderers     114
Name: Blocks, dtype: int64

In [42]:
prem_df['Team'].value_counts()

Team
Manchester-City             114
West-Ham-United             114
Brentford                   114
Everton                     114
Wolverhampton-Wanderers     114
Arsenal                     114
Brighton-and-Hove-Albion    114
Crystal-Palace              114
Manchester-United           114
Newcastle-United            114
Chelsea                     114
Tottenham-Hotspur           114
Aston-Villa                 114
Liverpool                   114
Burnley                      76
Southampton                  76
Leeds-United                 76
Leicester-City               76
Fulham                       76
Nottingham-Forest            76
Bournemouth                  76
Sheffield-United             38
Luton-Town                   38
Watford                      38
Norwich-City                 38
Name: count, dtype: int64

In [45]:
prem_df[prem_df['Team'] == 'Tottenham-Hotspur'][["Date", "Sh", "SoT", "Dist", "FK", "PK", "xG_y"]].head(38)

Unnamed: 0,Date,Sh,SoT,Dist,FK,PK,xG_y
0,2023-08-13,18,6,19.6,0,0,1.3
1,2023-08-19,17,6,13.8,0,0,1.7
2,2023-08-26,17,6,16.6,1,0,2.1
4,2023-09-02,21,11,19.3,0,0,2.2
5,2023-09-16,28,10,16.4,0,0,2.1
6,2023-09-24,13,5,16.0,0,0,1.4
7,2023-09-30,24,8,19.8,0,0,2.2
8,2023-10-07,15,4,16.9,1,0,1.9
9,2023-10-23,15,5,17.2,0,0,1.5
19,2023-10-27,10,1,19.5,1,0,1.1


## Store the DataFrame to CSV file

In [46]:
prem_df.to_csv('prem_data.csv')