In [249]:
import pandas as pd
import numpy as np

import requests
from bs4 import BeautifulSoup

import time

import warnings
warnings.filterwarnings('ignore')

In [8]:
def fetch_seasonal_player_data(url):
    
    resp = requests.get(url)
    resp.encoding = 'utf-8'
    soup = BeautifulSoup(resp.text.replace('<!--', '').replace('--!>', ''), 'html.parser')

    headers, rows = [], []

    table_data = soup.find_all('tbody')[2]
    table_rows = table_data.find_all('tr')

    for value in table_rows[0]:
        headers.append(value.get('data-stat'))

    for row in table_rows:
        row_values = []
        row_data = row.find_all('td')
        for data_value in row_data:
            if data_value.get('data-append-csv'):
                row_values.append(data_value.get('data-append-csv'))
            row_values.append(data_value.get_text())
        rows.append(row_values)

    df = pd.DataFrame(rows, columns = headers).fillna(0)
    df = df.loc[df['player'] != 0].reset_index(drop = True)

    if 'minutes' in headers:
        df['minutes'] = df['minutes'].str.replace(',', '')
    elif 'gk_minutes' in headers:
        df['gk_minutes'] = df['gk_minutes'].str.replace(',', '')

    for i, row in df.iterrows():
        try:
            if '-' in row['age']:
                age_num = row['age'].split('-')[0]
                df.at[i, 'age'] = age_num
        except: pass

    df.rename(columns={'ranker' : 'player_id'}, inplace=True)
    df = df.apply(pd.to_numeric, errors = 'ignore')
    
    return df

In [9]:
def check_comp(comp):
    output = ""
    if comp.lower() == 'england' or comp.lower() == "premier_league" or comp.lower() == "eng":
        league_code = 9
        league_name = "Premier-League"
    elif comp.lower() == 'spain' or comp.lower() == "la_liga" or comp.lower() == "esp":
        league_code = 12
        league_name = "La-Liga"
    elif comp.lower() == 'italy' or comp.lower() == "serie-a" or comp.lower() == "ita":
        league_code = 11
        league_name = "Serie-A"
    elif comp.lower() == 'germany' or comp.lower() == "bundesliga" or comp.lower() == "ger":
        league_code = 20
        league_name = "Bundesliga"
    elif comp.lower() == 'france' or comp.lower() == "ligue-1" or comp.lower() == "fra":
        league_code = 13
        league_name = "Ligue-1"
    return league_code, league_name

In [10]:
def seasonal_player_data(competition, year, data_type):
    
    comp = check_comp(competition)
    season = f"{str(year)}-{str(year+1)}"
    url = f"https://fbref.com/en/comps/{comp[0]}/{season}/{data_type}/{season}-{comp[1]}-Stats"
    df = fetch_seasonal_player_data(url)
    
    return df

In [11]:
df = seasonal_player_data("eng", 2024, "misc")

In [12]:
df.head(5)

Unnamed: 0,player_id,player,nationality,position,team,age,birth_year,minutes_90s,cards_yellow,cards_red,...,interceptions,tackles_won,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,matches
0,774cf58b,Max Aarons,eng ENG,DF,Bournemouth,25,2000,1.0,0,0,...,1,2,0,0,0,7,0,0,,Matches
1,b7e62e1d,Joshua Acheampong,eng ENG,DF,Chelsea,18,2006,1.8,0,0,...,1,1,0,0,0,7,1,6,14.3,Matches
2,2b09d998,Tyler Adams,us USA,MF,Bournemouth,25,1999,7.6,4,0,...,21,27,0,1,0,36,11,5,68.8,Matches
3,c81d773d,Tosin Adarabioyo,eng ENG,DF,Chelsea,27,1997,8.1,2,0,...,3,9,0,0,0,24,19,12,61.3,Matches
4,4dcec659,Simon Adingra,ci CIV,"FW,MF",Brighton,23,2002,6.1,0,0,...,6,6,0,0,0,30,3,1,75.0,Matches


In [13]:
## Create a dataset with all the players
## Create a dataframe with all the possible data for every player
## Do the same for the goalkeepers

## Team Data
## Match Data

## League Standings