In [1]:
!pip install unidecode




[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from unidecode import unidecode
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [3]:
driver = webdriver.Chrome()
driver.maximize_window()

In [4]:
driver.get("https://www.icons.com/players/a-k.html")

In [5]:
try:
    cookie_button = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.XPATH, "//button[@title='Accept all cookies']"))
    )
    cookie_button.click()
    print("Cookies accepted!")

except Exception as e:
    print(f"Could not find or click the cookie button: {e}")

Cookies accepted!


In [6]:
players = driver.find_elements(By.XPATH, "//div[contains(@class, 'products-grid')]//li/div/a")

In [7]:
player_name_link = {}

for player in players:
    player_name = player.text.strip()
    player_url = player.get_attribute("href")

    if player_name.split()[-1][0].upper() in 'ABC':
        # print(player_name)
        player_name_link[player_name] = player_url

In [8]:
for name, url in player_name_link.items():
    param_name = unidecode(name).replace(" ", "+")
    products_url = f"{url}?player_names={param_name}"
    player_name_link[name] = products_url

In [9]:
final_links = {}
curr_product_count_xpath = "//p/span[@x-text='productsProgress']"
total_product_count_xpath = "//p/span[@x-text='productsTotal']"
for name, signed_url in player_name_link.items():
    driver.get(signed_url)
    try:
        curr_item_count = int(driver.find_element(By.XPATH, curr_product_count_xpath).text)
        total_item_count = int(driver.find_element(By.XPATH, total_product_count_xpath).text)
    except Exception:
        print(f"skipping player {name} as there are no products to display")
        pass

    while curr_item_count != total_item_count:
        # print(curr_item_count, total_item_count)
        driver.find_element(By.XPATH, "//button[contains(text(), 'Load More Items')]").click()
        
        WebDriverWait(driver, 10).until(
            lambda d: int(d.find_element(By.XPATH, curr_product_count_xpath).text) != curr_item_count
        )
        
        curr_item_count = int(driver.find_element(By.XPATH, curr_product_count_xpath).text)
        total_item_count = int(driver.find_element(By.XPATH, total_product_count_xpath).text)

    product_list = driver.find_elements(By.XPATH, "//div[contains(@class, 'products-grid')]//*[contains(@class, 'product')]/a")
    product_links = []
    for product in product_list:
        product_links.append(product.get_attribute("href"))
    final_links[name] = product_links

skipping player Ross Barkley as there are no products to display
skipping player Federico Chiesa as there are no products to display
skipping player Diego Costa as there are no products to display


In [10]:
driver.close()

In [186]:
url = final_links['Trent Alexander-Arnold'][0]
# url = "https://www.icons.com/sergio-busquets-official-fifa-world-cup-back-signed-and-framed-spain-2022-23-home-shirt-68975.html"
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html')

soup.find('p', title='Availability').get_text(strip=True)

'Out of Stock'

In [11]:
final_table = []
for name, product_links in final_links.items():
    for product_link in product_links:
        page = requests.get(product_link)
        soup = BeautifulSoup(page.text, 'html')
    
        price = soup.find('meta', itemprop='price')['content']
        title = soup.find("h1", class_='page-title').get_text(strip=True)

        stock = soup.find('p', title='Availability').get_text(strip=True)
        
        product_data = {
            'player_name': name,
            'product_link': product_link,
            'product_title': title,
            'price': float(price),
            'availability': False if stock == 'Out of Stock' else True
        }
        table = soup.find('table', class_='additional-attributes')
        for row in table.find_all('tr'):
            for col in row.find('td'):
                key = row.find('th').get_text(strip=True)
                value = col.get_text(strip=True)
                product_data[key] = value
        final_table.append(product_data)

https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-2024-25-home-shirt-in-deluxe-packaging-18636.html
https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-fc-2019-20-home-shirt-in-deluxe-packaging-20740.html
https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-2024-25-home-shirt-in-hero-frame-18638.html
https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-2024-25-home-shirt-in-classic-frame-18637.html
https://www.icons.com/trent-alexander-arnold-official-uefa-champions-league-back-signed-and-framed-liverpool-fc-2024-25-home-shirt-40916.html
https://www.icons.com/trent-alexander-arnold-signed-liverpool-2019-20-home-shirt-in-commentary-chart-frame-1.html
https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-fc-2019-20-home-shirt-in-hero-frame-20742.html
https://www.icons.com/trent-alexander-arnold-back-signed-liverpool-fc-2019-20-home-shirt-in-classic-frame-20741.html
https://www.icons.com/trent-alexander-arnold-front-

In [142]:
df = pd.DataFrame(final_table)
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df.to_csv("products.csv", index=False)

In [151]:
df['size'] = df[['Presentation size', 'Photo size']].bfill(axis=1).iloc[:, 0]
product_counts = df.groupby(['Presentation type', 'Product type(s)', 'size'])\
    .agg(player_count=('player_name', 'nunique'), product_count=('player_name', 'count'))\
    .reset_index()
most_common = product_counts.sort_values(by='player_count', ascending=False).iloc[0]
filtered_df = df[
    (df['Presentation type'] == most_common['Presentation type']) &
    (df['Product type(s)'] == most_common['Product type(s)']) &
    (df['size'] == most_common['size'])
]
signature_value = filtered_df.groupby('player_name')['price'].mean().reset_index()
signature_value = signature_value.rename(columns={'price': 'signature_worth'})
signature_value = signature_value.sort_values(by='signature_worth', ascending=False)
signature_value.player_name

signature_value.to_csv('signature_worth.csv', index=False)

In [152]:
portfolio_value = df.groupby('player_name')['price'].sum().reset_index()
portfolio_value.to_csv('portfolio_values.csv', index=False)

In [153]:
excluded_players = df[~df['player_name'].isin(signature_value['player_name'])]
excluded_players[['player_name']].drop_duplicates().to_csv('excluded_players.csv', index=False)

In [167]:
signature_value_on_max_price = df.groupby('player_name')['price'].max().reset_index()
signature_value_on_max_price = signature_value_on_max_price.rename(columns={'price': 'signature_worth'})
signature_value_on_max_price = signature_value_on_max_price.sort_values(by='signature_worth', ascending=False)
signature_value_on_max_price.to_csv('signature_worth_by_max_price.csv', index=False)