In [4]:
# Importing necessary libraries

import numpy as np 
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [5]:
def get_player_urls():
    player_urls = [] # An empty list that will carry urls for each player info from page 1 to 209
    """ The function collects urls for each webpage. 
    
    The players list spans from webpage 1 to 209. From each webpage, each player url is obtained returned
    
    Returns
    -------
    player_urls
        A list of urls for each player on the database
    """
    for page in range(1,210):
        page_url = 'https://www.futhead.com/22/players/?page={}&level=all_nif'.format(page)  
        fifa = requests.get(page_url)
        fifaSoup = bs(fifa.text, 'html.parser')
        for player_row in fifaSoup.findAll(class_='display-block padding-0'): # Each player_row holds and individual player information
            player_relative_url = player_row['href']
            player_absolute_url = 'http://futhead.com' + player_relative_url
            player_urls.append(player_absolute_url)
    return player_urls

all_player_urls = get_player_urls() 

In [6]:
all_players_stats = [] # A list of dictionaries that will carry each player information

def get_player_stats():
    """
    The function skims through a list of player urls and scraps their stats 
    
    """
    for player_url in all_player_urls :
        player_stats_full = {} # Empty dictionary to hold stats for each individual player
        player = requests.get(player_url)
        if player.status_code != 200:
            continue
        playerSoup = bs(player.text, 'html.parser')
        
        playerinfo = [] # Carries the raw information
        playerinfo_stripped = [] # Whitespaces stripped

        for trait in playerSoup.findAll(class_='col-xs-5 player-sidebar-value'): 
            playerinfo.append(trait.text)

        for item in playerinfo:
            item = item.strip() 
            playerinfo_stripped.append(item)

            player_stats_full['id'] = playerinfo_stripped[-1]
            playername = playerSoup.find(class_= 'font-16 fh-red').text
            player_stats_full['name'] = playername.strip()
            
            player_stats_full['club'] = playerinfo_stripped[0]

        for item in playerSoup.findAll(class_='divided-row player-stat-row sm'): # Player stat name and its corresponding value
            player_stats = item.text 
            player_stats = player_stats.strip()
            stat_name = player_stats[:-2].strip()
            stat_value = player_stats[-2:]    
            try:
                player_stats_full[stat_name] = int(stat_value)
            except:
                player_stats_full[stat_name] = stat_value
        
        all_players_stats.append(player_stats_full)    

get_player_stats()

In [7]:
outfield_players_df = pd.DataFrame(all_players_stats)
outfield_players_df = outfield_players_df.set_index('id')
outfield_players_df.to_csv('datasets/outfield_players_stats.csv')

In [8]:
outfield_players_df

Unnamed: 0_level_0,name,club,Acceleration,Sprint Speed,Positioning,Finishing,Shot Power,Long Shots,Volleys,Penalties,...,Composure,Interceptions,Heading,Def. Awareness,Standing Tackle,Sliding Tackle,Jumping,Stamina,Strength,Aggression
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
158023,Lionel Messi,Paris SG,91.0,80.0,93.0,95.0,86.0,94.0,88.0,75.0,...,96.0,40.0,70.0,20.0,35.0,24.0,68.0,72.0,69.0,44.0
188545,Robert Lewandowski,FC Bayern München,77.0,79.0,95.0,95.0,90.0,87.0,89.0,90.0,...,89.0,49.0,90.0,35.0,42.0,19.0,85.0,76.0,86.0,81.0
192985,Kevin De Bruyne,Manchester City,76.0,76.0,88.0,82.0,91.0,91.0,82.0,83.0,...,89.0,66.0,55.0,68.0,65.0,53.0,63.0,89.0,74.0,76.0
190871,Neymar da Silva Santos Jr.,Paris SG,93.0,89.0,86.0,83.0,80.0,81.0,86.0,93.0,...,93.0,37.0,63.0,35.0,32.0,29.0,64.0,81.0,53.0,63.0
231747,Kylian Mbappé,Paris SG,97.0,97.0,92.0,93.0,86.0,82.0,83.0,79.0,...,88.0,38.0,72.0,26.0,34.0,32.0,78.0,88.0,77.0,62.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183711,Jordan Henderson,Liverpool,67.0,66.0,78.0,73.0,77.0,75.0,71.0,64.0,...,82.0,81.0,65.0,79.0,81.0,76.0,78.0,86.0,78.0,83.0
199451,Wissam Ben Yedder,AS Monaco,83.0,81.0,90.0,88.0,84.0,74.0,84.0,85.0,...,85.0,44.0,74.0,34.0,33.0,26.0,78.0,75.0,69.0,62.0
200647,Josip Iličić,Bergamo Calcio,76.0,74.0,85.0,85.0,88.0,89.0,83.0,78.0,...,80.0,46.0,58.0,41.0,32.0,26.0,34.0,65.0,74.0,58.0
184134,Fernando Reges,Sevilla FC,65.0,67.0,59.0,54.0,76.0,68.0,56.0,54.0,...,78.0,85.0,74.0,84.0,85.0,83.0,76.0,82.0,81.0,85.0
