In [2]:
import pandas as pd
import requests as r
from bs4 import BeautifulSoup

# GETTERS 
def get_player_stats(url,id_no):
    soup = BeautifulSoup(r.get(url.format(id_no)).content,'html.parser')
    return soup.find_all("span",{"class":"stat"})

def get_pl_id(name,path_to_file):
    df = pd.read_csv(path_to_file)
    return int(df[df['Name']==name]['id'])
    
# CLEANERS
def clean_player_stats(stats):
    # extract stat names and values from raw html
    names_and_values = []
    for t in stats:
        names_and_values.append(t.text.strip())
    
    # filter out stat titles such as 'Attack', 'Defence', 'Discipline'
    names_and_values = list(filter(lambda x: '\n' in x,names_and_values))

    # split into stat name and value
    names_and_values = list(map(lambda x: x.split('\n'),names_and_values))
    
    # remove whitespace and convert to dict
    stats_dict = {t[0].strip():t[1].strip() for t in names_and_values}

    # convert percentage values to a range between [0,1] & get rid of commas in values
    for key, val in stats_dict.items():
        if '%' in val:
            stats_dict[key] = float(float(val.strip('%'))/100)
        elif ',' in val:
            stats_dict[key] = val.replace(',','')
        else:
            stats_dict[key] = float(val)
    
    return stats_dict

In [3]:
url = "https://www.premierleague.com/players/{}/player/stats"
clean_player_stats(get_player_stats(url,get_pl_id("Jamie Vardy",r"C:\Users\dillo\OneDrive - University of Guelph\DjangoProjects\dashboard\src\static\dataframes\pl_players_info.csv")))

{'Appearances': 201.0,
 'Goals': 97.0,
 'Wins': 81.0,
 'Losses': 73.0,
 'Goals per match': 0.48,
 'Headed goals': 12.0,
 'Goals with right foot': 60.0,
 'Goals with left foot': 25.0,
 'Penalties scored': 17.0,
 'Freekicks scored': 0.0,
 'Shots': 423.0,
 'Shots on target': 211.0,
 'Shooting accuracy %': 0.5,
 'Hit woodwork': 13.0,
 'Big chances missed': 75.0,
 'Assists': 28.0,
 'Passes': '2810',
 'Passes per match': 13.98,
 'Big chances created': 45.0,
 'Crosses': 247.0,
 'Yellow cards': 20.0,
 'Red cards': 3.0,
 'Fouls': 172.0,
 'Offsides': 182.0,
 'Tackles': 119.0,
 'Blocked shots': 69.0,
 'Interceptions': 59.0,
 'Clearances': 80.0,
 'Headed Clearance': 54.0}

In [2]:
url = "https://www.premierleague.com/players/{}/player/stats"
clean_player_page(get_player_page(url,get_pl_id("Enda Stevens",r"C:\Users\dillo\OneDrive - University of Guelph\DjangoProjects\dashboard\src\static\dataframes\pl_players_info.csv")))

{'Appearances': 34.0,
 'Goals': 2.0,
 'Wins': 12.0,
 'Losses': 10.0,
 'Clean sheets': 10.0,
 'Goals conceded': 35.0,
 'Tackles': 77.0,
 'Tackle success %': 0.6,
 'Last man tackles': 0.0,
 'Blocked shots': 4.0,
 'Interceptions': 40.0,
 'Clearances': 71.0,
 'Headed Clearance': 26.0,
 'Clearances off line': 0.0,
 'Recoveries': 188.0,
 'Duels won': 174.0,
 'Duels lost': 155.0,
 'Successful 50/50s': 52.0,
 'Aerial battles won': 28.0,
 'Aerial battles lost': 25.0,
 'Own goals': 0.0,
 'Errors leading to goal': 1.0,
 'Assists': 2.0,
 'Passes': '1202',
 'Passes per match': 35.35,
 'Big chances created': 8.0,
 'Crosses': 100.0,
 'Cross accuracy %': 0.22,
 'Through balls': 2.0,
 'Accurate long balls': 57.0,
 'Yellow cards': 6.0,
 'Red cards': 0.0,
 'Fouls': 37.0,
 'Offsides': 2.0,
 'Headed goals': 1.0,
 'Goals with right foot': 0.0,
 'Goals with left foot': 1.0,
 'Hit woodwork': 0.0}

In [6]:
url = "https://www.premierleague.com/players/{}/player/stats"
clean_player_page(get_player_page(url,get_pl_id("Claudio Bravo",r"C:\Users\dillo\OneDrive - University of Guelph\DjangoProjects\dashboard\src\static\dataframes\pl_players_info.csv")))

{'Appearances': 29.0,
 'Clean sheets': 7.0,
 'Wins': 17.0,
 'Losses': 7.0,
 'Saves': 45.0,
 'Penalties saved': 1.0,
 'Punches': 14.0,
 'High Claims': 14.0,
 'Catches': 9.0,
 'Sweeper clearances': 29.0,
 'Throw outs': 125.0,
 'Goal Kicks': 159.0,
 'Goals conceded': 34.0,
 'Errors leading to goal': 1.0,
 'Own goals': 0.0,
 'Yellow cards': 0.0,
 'Red cards': 0.0,
 'Fouls': 0.0,
 'Goals': 0.0,
 'Assists': 0.0,
 'Passes': 838.0,
 'Passes per match': 28.9,
 'Accurate long balls': 133.0}

In [7]:
url = "https://www.premierleague.com/players/{}/player/stats"
clean_player_page(get_player_page(url,get_pl_id("Wilfred Ndidi",r"C:\Users\dillo\OneDrive - University of Guelph\DjangoProjects\dashboard\src\static\dataframes\pl_players_info.csv")))

{'Appearances': 110.0,
 'Goals': 6.0,
 'Wins': 47.0,
 'Losses': 40.0,
 'Goals per match': 0.05,
 'Headed goals': 1.0,
 'Goals with right foot': 4.0,
 'Goals with left foot': 1.0,
 'Penalties scored': 0.0,
 'Freekicks scored': 0.0,
 'Shots': 145.0,
 'Shots on target': 24.0,
 'Shooting accuracy %': 0.17,
 'Hit woodwork': 5.0,
 'Big chances missed': 4.0,
 'Assists': 5.0,
 'Passes': '5447',
 'Passes per match': 49.52,
 'Big chances created': 3.0,
 'Crosses': 25.0,
 'Cross accuracy %': 0.04,
 'Through balls': 15.0,
 'Accurate long balls': 252.0,
 'Yellow cards': 17.0,
 'Red cards': 2.0,
 'Fouls': 162.0,
 'Offsides': 5.0,
 'Tackles': 436.0,
 'Tackle success %': 0.63,
 'Blocked shots': 39.0,
 'Interceptions': 227.0,
 'Clearances': 274.0,
 'Headed Clearance': 132.0,
 'Recoveries': 918.0,
 'Duels won': 966.0,
 'Duels lost': 787.0,
 'Successful 50/50s': 96.0,
 'Aerial battles won': 366.0,
 'Aerial battles lost': 303.0,
 'Errors leading to goal': 1.0}