### FIFA 22 WEB SCRAPING OF PLAYER STATS 
#### The stats are scraped from [FUTHEAD](https://www.futhead.com)
![Unsplash image by Cristiano Pinto](img/cristiano-pinto-unsplash.jpg)
<p>Photo by Cristiano Pinto on Unsplash</p>

In [1]:
# Importing necessary libraries

import numpy as np 
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [2]:
def get_player_urls():
    player_urls = [] # An empty list that will carry urls for each player info from page 1 to 209
    """ The function collects urls for each webpage. 
    
    The players list spans from webpage 1 to 209. From each webpage, each player url is obtained returned
    
    Returns
    -------
    player_urls
        A list of urls for each player on the database
    """
    for page in range(1,210):
        page_url = 'https://www.futhead.com/22/players/?page={}&level=all_nif'.format(page)  
        fifa = requests.get(page_url)
        fifaSoup = bs(fifa.text, 'html.parser')
        for player_row in fifaSoup.findAll(class_='display-block padding-0'): # Each player_row holds and individual player information
            player_relative_url = player_row['href']
            player_absolute_url = 'http://futhead.com' + player_relative_url
            player_urls.append(player_absolute_url)
    return player_urls

all_player_urls = get_player_urls() 

In [None]:
all_players_stats = [] # A list of dictionaries that will carry each player information

def get_player_stats():
    """
    The function skims through a list of player urls and scraps their stats 
    
    """
    for player_url in all_player_urls :
        player_stats_full = {} # Empty dictionary to hold stats for each individual player
        player = requests.get(player_url)
        if player.status_code != 200:
            continue
        playerSoup = bs(player.text, 'html.parser')
        
        playerinfo = [] # Carries the raw information
        playerinfo_stripped = [] # Whitespaces stripped

        for trait in playerSoup.findAll(class_='col-xs-5 player-sidebar-value'): 
            playerinfo.append(trait.text)

        for item in playerinfo:
            item = item.strip() 
            playerinfo_stripped.append(item)

            player_stats_full['id'] = playerinfo_stripped[-1]
            playername = playerSoup.find(class_= 'font-16 fh-red').text
            player_stats_full['name'] = playername.strip()
            
            player_stats_full['club'] = playerinfo_stripped[0]

        for item in playerSoup.findAll(class_='divided-row player-stat-row sm'): # Player stat name and its corresponding value
            player_stats = item.text 
            player_stats = player_stats.strip()
            stat_name = player_stats[:-2].strip()
            stat_value = player_stats[-2:]    
            try:
                player_stats_full[stat_name] = int(stat_value)
            except:
                player_stats_full[stat_name] = stat_value
        
        all_players_stats.append(player_stats_full)    

get_player_stats()

In [None]:
outfield_players_df = pd.DataFrame(all_players_stats)
outfield_players_df = outfield_players_df.set_index('id')
outfield_players_df.to_csv('datasets/outfield_players_stats.csv')

In [None]:
outfield_players_df