In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [58]:
def get_players():

    url = "https://fbref.com/en/comps/Big5/stats/players/Big-5-European-Leagues-Stats"

    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    players = soup.find_all("td", {"class":"left", "data-stat":"player"})
    positions = soup.find_all("td", {"class":"center", "data-stat":"position"})#
    teams = soup.find_all("td", {"class":"left", "data-stat":"team"})

    players_list = []
    for i in range(len(players)):
        name = players[i].find("a").text
        position = positions[i].text
        team = teams[i].text
        href = players[i].find("a").get("href")
        players_list.append([name, position, team, href])
    df = pd.DataFrame(players_list, columns=["Name", "Position", "Team", "URL"])
    #df["Name"] = df["Name"].str.replace(" ", "-")
    df["Position"] = df["Position"].str.split(",").apply(lambda x: x[0])
    
    df.set_index("Name", inplace=True)
    
    return df

In [59]:
player_database = get_players()

In [80]:
player_database.loc["Liam Cooper"]

Position                                  DF
Team                            Leeds United
URL         /en/players/dc64b8b3/Liam-Cooper
Name: Liam Cooper, dtype: object

In [68]:
def player_stats(player_name, player_database=player_database):
    
    url = player_database.loc[player_name]["URL"]
    
    # Define the URL of the page you want to scrape
    url = "https://fbref.com{}".format(url)

    # Send a GET request to the website
    response = requests.get(url)

    # Parse the HTML content of the page
    soup = BeautifulSoup(response.content, 'html.parser')

    import regex as re
    
    # Find the table containing the performance statistics
    table = soup.find('table', {'id': re.compile(r'scout_summary')})

    # Find all the rows in the table
    rows = table.find_all('tr')

    import pandas as pd

    # Extract the data from the rows of the table
    data = []
    index = []
    for row in rows:
        index.append(row.find('th').text)
        data.append([cell.text for cell in row.find_all('td')])

    # Extract the column titles from the table
    column_titles = [cell.text for cell in table.find('thead').find_all('th')]

    # create index dataframe
    index_df = pd.DataFrame(index, columns=[column_titles[0]])

    # Create a dataframe from the extracted data
    data_df = pd.DataFrame(data, columns=column_titles[1:])

    # concatenate index_df and data_df
    stats = pd.concat([index_df, data_df], axis=1)
    stats = stats.drop(stats.index[0]).set_index("Statistic", drop=True)
    stats = stats[stats.astype(bool)].dropna()
    if player_database.loc[player_name]["Position"] != "GK":
        stats["Per 90"]["Pass Completion %"] = stats["Per 90"]["Pass Completion %"].replace("%", "")
    else:
        stats["Per 90"]["Save% (Penalty Kicks)"] = stats["Per 90"]["Save% (Penalty Kicks)"].replace("%", "")
        stats["Per 90"]["Save Percentage"] = stats["Per 90"]["Save Percentage"].replace("%", "")
        stats["Per 90"]["Clean Sheet Percentage"] = stats["Per 90"]["Clean Sheet Percentage"].replace("%", "")
        stats["Per 90"]["Launch %"] = stats["Per 90"]["Launch %"].replace("%", "")
        stats["Per 90"]["Crosses Stopped %"] = stats["Per 90"]["Crosses Stopped %"].replace("%", "")
    stats = stats.astype("float")
    stats =stats.reset_index()
    stats.index.name = player_name
    
    return stats

In [69]:
stats = player_stats("Alisson")

In [70]:
stats

Unnamed: 0_level_0,Statistic,Per 90,Percentile
Alisson,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,PSxG-GA,0.2,92.0
1,Goals Against,0.98,85.0
2,Save Percentage,72.7,65.0
3,PSxG/SoT,0.32,91.0
4,Save% (Penalty Kicks),40.0,83.0
5,Clean Sheet Percentage,39.6,85.0
6,Touches,39.85,73.0
7,Launch %,18.4,4.0
8,Goal Kicks,4.75,6.0
9,Avg. Length of Goal Kicks,33.4,17.0


In [24]:
import plotly.express as px

In [95]:
def polar_plots(player_name1, player_name2, tables=False):
    
    import plotly.graph_objects as go

    player1 = player_stats(player_name1)
    player2 = player_stats(player_name2)
    
    fig = go.Figure()

    fig.add_trace(go.Scatterpolar(
          r=player1["Percentile"],
          theta=player1["Statistic"],
          fill='toself',
          name=player1.index.name
    ))
    fig.add_trace(go.Scatterpolar(
          r=player2["Percentile"],
          theta=player2["Statistic"],
          fill='toself',
          name=player2.index.name 
    ))

    fig.update_layout(
      polar=dict(
        radialaxis=dict(
          visible=True,
          range=[0, 100]
        )),
      showlegend=True
    )

    fig.show()   
    
    if tables:
        display(player1)
        display(player2)

In [94]:
polar_plots("Dejan Kulusevski", "Son Heung-min", tables=True)

Unnamed: 0_level_0,Statistic,Per 90,Percentile
Dejan Kulusevski,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Non-Penalty Goals,0.26,57.0
1,Non-Penalty xG,0.21,46.0
2,Shots Total,1.93,31.0
3,Assists,0.56,96.0
4,xAG,0.27,78.0
5,npxG + xAG,0.47,53.0
6,Shot-Creating Actions,4.29,66.0
7,Passes Attempted,34.69,32.0
8,Pass Completion %,80.6,79.0
9,Progressive Passes,2.44,35.0


Unnamed: 0_level_0,Statistic,Per 90,Percentile
Son Heung-min,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Non-Penalty Goals,0.56,95.0
1,Non-Penalty xG,0.41,91.0
2,Shots Total,2.78,79.0
3,Assists,0.16,31.0
4,xAG,0.24,65.0
5,npxG + xAG,0.65,88.0
6,Shot-Creating Actions,3.91,51.0
7,Passes Attempted,31.62,18.0
8,Pass Completion %,73.4,30.0
9,Progressive Passes,1.79,13.0


In [74]:
polar_plots("Brenden Aaronson", "Azzedine Ounahi")