In [6]:
import re
import requests
import pandas as pd

from bs4 import BeautifulSoup

In [47]:
# URL for the Champions League market values page
BASE_URL = "https://www.transfermarkt.com/uefa-champions-league/marktwerte/pokalwettbewerb/CL"

# Headers to mimic a browser visit (important to avoid blocking)
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.77 Safari/537.36",
}

# Initialize an empty list to store player data from all pages
player_data = []

# Scrape multiple pages (example: scrape the first 8 pages; adjust the range accordingly)
for page in range(1, 9):  # Adjust the range to the total number of pages you want to scrape
    print(f"Scraping page {page}...")  # Output progress

    # Append the page number to the URL
    url = f"{BASE_URL}?page={page}"

    # Send a GET request
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to fetch page {page}! Status code: {response.status_code}")
        continue

    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")

    # Find the table containing player data
    table = soup.find("table", {"class": "items"})

    # Extract rows from the table
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        # Each row contains multiple cells
        cells = row.find_all("td")

        # Extract relevant data from cells
        try:
            name = cells[1].find("a").text.strip()  # Player name
            club = cells[7].find("a").get("title")
            position = cells[4].text.strip()  # Position
            age = cells[6].text.strip()  # Age
            market_value = cells[8].text.strip()  # Market value

            # Extract the club name and the club logo URL
            club_td = cells[3]  # The table cell where the club logo is located
            club_image_tag = club_td.find("img")  # Find the <img> tag inside the cell
            club_logo_url = (
                club_image_tag["src"] if club_image_tag else "No Image"
            )  # Get the src attribute (image URL)
            club_name = (
                club_image_tag["title"] if club_image_tag else "Unknown Club"
            )  # Get the club name from the title attribute

        except IndexError:
            # If there is a problem extracting data for a row, continue to the next one
            continue

        # Append player and club data to list
        player_data.append([name, position, age, market_value, club])

# Convert the data into a DataFrame
columns = ["Name", "Position", "Age", "Market Value", "Club Name"]
df = pd.DataFrame(player_data, columns=columns)

# Save the data to a CSV file
df.to_csv("champions_league_player_club_images.csv", index=False)

# Print the first few rows to verify
print(df.head())


Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
              Name            Position Age Market Value        Club Name
0   Erling Haaland      Centre-Forward  24     €200.00m  Manchester City
1  Vinicius Junior         Left Winger  24     €200.00m      Real Madrid
2  Jude Bellingham  Attacking Midfield  21     €180.00m      Real Madrid
3     Lamine Yamal        Right Winger  17     €180.00m     FC Barcelona
4    Kylian Mbappé      Centre-Forward  26     €160.00m      Real Madrid


In [48]:
df.head(50)

Unnamed: 0,Name,Position,Age,Market Value,Club Name
0,Erling Haaland,Centre-Forward,24,€200.00m,Manchester City
1,Vinicius Junior,Left Winger,24,€200.00m,Real Madrid
2,Jude Bellingham,Attacking Midfield,21,€180.00m,Real Madrid
3,Lamine Yamal,Right Winger,17,€180.00m,FC Barcelona
4,Kylian Mbappé,Centre-Forward,26,€160.00m,Real Madrid
5,Bukayo Saka,Right Winger,23,€150.00m,Arsenal FC
6,Phil Foden,Right Winger,24,€140.00m,Manchester City
7,Jamal Musiala,Attacking Midfield,21,€140.00m,Bayern Munich
8,Florian Wirtz,Attacking Midfield,21,€140.00m,Bayer 04 Leverkusen
9,Rodri,Defensive Midfield,28,€130.00m,Manchester City
