Using the "name.csv" file to retrieve player statistics. If the player exists on the website 'https://www.fifaratings.com', we collect their statistics. Otherwise, we assign a value of 0 to all columns to signify that the player was not found on the website. The scraped data will be stored in a CSV file named "stats.csv".

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import csv
import requests
from bs4 import BeautifulSoup as bs
import re

In [2]:
# reading the names from the csv file and store it in name_list
def read_names_from_csv(csv_file):
  name_list = []
  with open(csv_file, 'r') as file:    # the csv file that contains names is names.csv
    next(file)
    k = 0
    for line in file:
      name_list.append(line.split(',')[1].replace(' ', '-').replace('\n', ''))
  return name_list

In [3]:
# scrapping the values from the website

def scrape_player_data(name_list):
  player_not_found = []
  skills_stat = []


  for name in name_list:
    url = f"https://www.fifaratings.com/{name}"
    html_text = requests.get(url, headers={'user-agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0"}).text
    soup = bs(html_text, "lxml")

    players = soup.find_all("span", class_ = "mr-n1")
    temp = []
    for item in players:
        temp.append(item.text)

    if temp == []:
        player_not_found.append(name)
    else:
        skills_stat.append(temp)
  return skills_stat, player_not_found

In [4]:
# removing the uneccessary values from skills_stat

def clean_player_data(skills_stat):
  index_to_remove = [2 * i + 1 for i in range(8)]


  for item in skills_stat:
    for index in sorted(index_to_remove, reverse=True):
        del item[index]
    del item[-1]
  return skills_stat

In [5]:
# combining skill_name and skills_stat

def combine_data(name_list, skills_stat, player_not_found):
    data = []
    columns = ["ATT", "SKI", "MOV", "POW", "MEN", "DEF", "GK"]  # the skills that we want to scrap

    for i, name in enumerate(name_list):
        player = {}
        if name in player_not_found:
            player["name"] = name.replace('-', ' ')
            for column in columns:
                player[column] = np.nan
        else:
          if i < len(skills_stat):
            player["name"] = name.replace('-', ' ')
            for index, column in enumerate(columns):
                player[column] = skills_stat[i][index]
        data.append(player)

    return data

In [6]:
def save_to_csv(data, output_csv_file):
    fields = data[0].keys()

    with open(output_csv_file, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fields)
        writer.writeheader()
        for row in data:
            writer.writerow(row)

In [7]:
# File paths
csv_file = 'name.csv'
output_csv_file = 'stats.csv'

# Read names from CSV
name_list = read_names_from_csv(csv_file)

# Scrape player data
skills_stat, player_not_found = scrape_player_data(name_list)

# Clean player data
clean_player_data(skills_stat)

# Combine data
data = combine_data(name_list, skills_stat, player_not_found)

# Save to CSV
save_to_csv(data, output_csv_file)