In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from datetime import datetime

# List of Premier League team URLs for 2024/2025 season
team_urls = {
    'Arsenal': 'https://www.transfermarkt.com/arsenal-fc/startseite/verein/11',
    'Aston Villa': 'https://www.transfermarkt.com/aston-villa/startseite/verein/405',
    'Bournemouth': 'https://www.transfermarkt.com/afc-bournemouth/startseite/verein/989',
    'Brentford': 'https://www.transfermarkt.com/brentford-fc/startseite/verein/1148',
    'Brighton & Hove Albion': 'https://www.transfermarkt.com/brighton-amp-hove-albion/startseite/verein/1237',
    'Chelsea': 'https://www.transfermarkt.com/chelsea-fc/startseite/verein/631',
    'Crystal Palace': 'https://www.transfermarkt.com/crystal-palace/startseite/verein/873',
    'Everton': 'https://www.transfermarkt.com/fc-everton/startseite/verein/29',
    'Fulham': 'https://www.transfermarkt.com/fulham-fc/startseite/verein/931',
    'Ipswich Town': 'https://www.transfermarkt.com/ipswich-town/startseite/verein/677',
    'Leicester City': 'https://www.transfermarkt.com/leicester-city/startseite/verein/1003',
    'Liverpool': 'https://www.transfermarkt.com/fc-liverpool/startseite/verein/31',
    'Manchester City': 'https://www.transfermarkt.com/manchester-city/startseite/verein/281',
    'Manchester United': 'https://www.transfermarkt.com/manchester-united/startseite/verein/985',
    'Newcastle United': 'https://www.transfermarkt.com/newcastle-united/startseite/verein/762',
    'Nottingham Forest': 'https://www.transfermarkt.com/nottingham-forest/startseite/verein/703',
    'Southampton': 'https://www.transfermarkt.com/fc-southampton/startseite/verein/180',
    'Tottenham Hotspur': 'https://www.transfermarkt.com/tottenham-hotspur/startseite/verein/148',
    'West Ham United': 'https://www.transfermarkt.com/west-ham-united/startseite/verein/379',
    'Wolverhampton Wanderers': 'https://www.transfermarkt.com/wolverhampton-wanderers/startseite/verein/543',
}

# CSV file to store player data
csv_file = "premier_league_players_2024.csv"

# Open CSV file for writing
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Header Row
    writer.writerow(["Name", "Nationality","Birthdate", "Age", "Position", "Market Value", "Club"])

    # Iterate over each team URL
    for club, url in team_urls.items():

        # Send a request to the team page
        headers = {'User-Agent': 'Mozilla/5.0'}
        response = requests.get(url, headers=headers)

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the table containing the players
        table = soup.find('table', {'class': 'items'})

        # Check if the table exists
        if table:

            # Iterate over each player row in the table
            for row in table.find_all('tr', {'class': ['odd', 'even']}):
                try:
                    # Extract player details
                    name = row.select_one("td.hauptlink a").text.strip()
                    nationality = row.select_one("td.zentriert img")["title"]
                    age_info = row.select_one("td.zentriert:nth-child(3)").text.strip()
                    position = row.select_one("tr:nth-child(2)").text.strip()
                    market_value = row.select_one("td.rechts.hauptlink").text.strip()

                     # Extract birthdate and age
                    birthdate_str = age_info.split(' (')[0]  # e.g. "Sep 15, 1995"

                    # Only attempt to parse the birthdate if it's in the expected format
                    try:
                        birthdate = datetime.strptime(birthdate_str, "%b %d, %Y").date()
                        age = age_info.split(' (')[1].replace(')', '')  # e.g. "29"
                    except ValueError:
                        # Skip rows where birthdate parsing fails
                        continue

                    # Write the player details to the CSV file, including the club
                    writer.writerow([name, nationality, birthdate, age, position, market_value, club])
                except AttributeError:
                    # Skip rows with missing data
                    continue

print(f"Data has been saved to {csv_file}")


Data has been saved to premier_league_players_2024.csv


In [None]:
from google.colab import files
files.download('premier_league_players_2024.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# URL of the top transfers page
url = "https://www.transfermarkt.com/premier-league/toptransfers/wettbewerb/GB1/saison_id/2024/land_id/alle/ausrichtung//spielerposition_id//altersklasse//leihe//w_s/s/zuab/0/art//plus/1"

# CSV file to store transfer data
csv_file = "top_transfers_premier_league_2024.csv"

# Send a request to the page
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    print("Successfully fetched the webpage")
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")
    exit()

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

# Open the CSV file for writing
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Player", "Age", "Position", "Transfer Date", "Market Value", "Nationality", "From Club", "To Club","Country of Club", "League", "Transfer Fee"])

    # Find the table containing the transfers
    table = soup.find('table', {'class': 'items'})

    # Check if the table exists
    if table:
        print("Table found, scraping data...")
        # Iterate over each transfer row in the table
        for row in table.find_all('tr', {'class': ['odd', 'even']}):
            try:
                # Extract player details
                player_info = row.find('td', class_='hauptlink')
                player_name = player_info.find('a').text.strip() if player_info else 'N/A'
                position = player_info.find_next('td').text.strip() if player_info else 'N/A'  # Position is in the next td
                age = row.find_all('td', class_='zentriert')[1].text.strip() if row.find_all('td', class_='zentriert') else 'N/A'
                transfer_date = row.find_all('td', class_='zentriert')[2].text.strip() if row.find_all('td', class_='zentriert') else 'N/A'
                market_value = row.find_all('td', class_='rechts')[0].text.strip() if row.find_all('td', class_='rechts') else 'N/A'

                # Extracting nationality from flags
                nationality_td = row.find_all('td', class_='zentriert')[3]
                if nationality_td:
                    flags = nationality_td.find_all('img')
                    nationalities = [flag['title'] for flag in flags if 'title' in flag.attrs]
                    nationality = ', '.join(nationalities) if nationalities else 'N/A'
                else:
                    nationality = 'N/A'

                # Extracting from club details
                from_club_td = row.find_all('td', class_='hauptlink')[1]  # From club is in the 7th td
                from_club_name = from_club_td.find('a').text.strip() if from_club_td.find('a') else 'N/A'

                # Extracting to club details
                to_club_td = row.find_all('td', class_='hauptlink')[2]  # To club is in the 8th td
                to_club_name = to_club_td.find('a').text.strip() if to_club_td.find('a') else 'N/A'

               # Extracting country of the 'To Club'
                to_country_td = to_club_td.find_next('td')
                if to_country_td:
                    to_country_img = to_country_td.find('img', class_='flaggenrahmen')
                    to_country = to_country_img['title'] if to_country_img and 'title' in to_country_img.attrs else 'N/A'
                else:
                    to_country = 'N/A'

                # Extracting league information (this is now corrected to target the league's <a> tag)
                league_td = row.find_all('td')[16]  # League information should be in the correct td
                if league_td:
                    league_link = league_td.find('a')
                    league = league_link.text.strip() if league_link else 'N/A'
                else:
                    league = 'N/A'


                # Extracting transfer fee
                transfer_fee = row.find_all('td', class_='rechts')[1].text.strip() if row.find_all('td', class_='rechts') else 'N/A'

                # Write the transfer details to the CSV file
                writer.writerow([player_name, age, position, transfer_date, market_value, nationality, from_club_name, to_club_name, to_country, league, transfer_fee])
                print(f"Scraped: {player_name}, {age}, {position}, {transfer_date}, {market_value}, {nationality}, {from_club_name}, {to_club_name}, {to_country}, {league}, {transfer_fee}")
            except Exception as e:
                print(f"Error scraping row: {e}")
                continue
    else:
        print("No table found on the page.")


Successfully fetched the webpage
Table found, scraping data...
Scraped: Julián Alvarez, 24, Centre-Forward, Aug 12, 2024, €90.00m, Argentina, Italy, Man City, Atlético Madrid, Spain, LaLiga, €75.00m
Scraped: Douglas Luiz, 26, Central Midfield, Jul 1, 2024, €70.00m, Brazil, Aston Villa, Juventus, Italy, Serie A, €51.50m
Scraped: Matthijs de Ligt, 25, Centre-Back, Aug 13, 2024, €65.00m, Netherlands, Bayern Munich, Man Utd, England, Premier League, €45.00m
Scraped: Dominic Solanke, 26, Centre-Forward, Aug 10, 2024, €40.00m, England, Nigeria, Bournemouth, Tottenham, England, Premier League, €64.30m
Scraped: Leny Yoro, 18, Centre-Back, Jul 18, 2024, €50.00m, France, Cote d'Ivoire, LOSC Lille, Man Utd, England, Premier League, €62.00m
Scraped: Pedro Neto, 24, Right Winger, Aug 11, 2024, €55.00m, Portugal, Wolves, Chelsea, England, Premier League, €60.00m
Scraped: Moussa Diaby, 25, Right Winger, Jul 24, 2024, €55.00m, France, Mali, Aston Villa, Al-Ittihad, Saudi Arabia, Saudi Pro League, €60.

In [None]:
import requests
from bs4 import BeautifulSoup
import csv

# Base URL of the top transfers page
base_url = "https://www.transfermarkt.com/premier-league/toptransfers/wettbewerb/GB1/saison_id/2024/land_id/alle/ausrichtung//spielerposition_id//altersklasse//leihe//w_s/s/zuab/0/art//plus/1"

# CSV file to store transfer data
csv_file = "top_transfers_premier_league_2024.csv"

# Send a request to the page
headers = {'User-Agent': 'Mozilla/5.0'}

# Open the CSV file for writing
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["Player", "Age", "Position", "Transfer Date", "Market Value", "Nationality", "From Club", "To Club", "Country of Club", "League", "Transfer Fee"])

    for page in range(1, 18):  # Loop through pages 1 to 20
        # Construct the URL for each page
        url = f"{base_url}/page/{page}"
        print(f"Fetching page {page}...")

        # Send a request to the page
        response = requests.get(url, headers=headers)

        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to retrieve the page {page}. Status code: {response.status_code}")
            continue  # Skip this page and continue with the next

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(response.content, 'html.parser')

        # Find the table containing the transfers
        table = soup.find('table', {'class': 'items'})

        # Check if the table exists
        if table:
            print("Table found, scraping data...")
            # Iterate over each transfer row in the table
            for row in table.find_all('tr', {'class': ['odd', 'even']}):
                try:
                    # Extract player details
                    player_info = row.find('td', class_='hauptlink')
                    player_name = player_info.find('a').text.strip() if player_info else 'N/A'
                    position = player_info.find_next('td').text.strip() if player_info else 'N/A'  # Position is in the next td
                    age = row.find_all('td', class_='zentriert')[1].text.strip() if row.find_all('td', class_='zentriert') else 'N/A'
                    transfer_date = row.find_all('td', class_='zentriert')[2].text.strip() if row.find_all('td', class_='zentriert') else 'N/A'
                    market_value = row.find_all('td', class_='rechts')[0].text.strip() if row.find_all('td', class_='rechts') else 'N/A'

                    # Extracting nationality from flags
                    nationality_td = row.find_all('td', class_='zentriert')[3]
                    if nationality_td:
                        flags = nationality_td.find_all('img')
                        nationalities = [flag['title'] for flag in flags if 'title' in flag.attrs]
                        nationality = ', '.join(nationalities) if nationalities else 'N/A'
                    else:
                        nationality = 'N/A'

                    # Extracting from club details
                    from_club_td = row.find_all('td', class_='hauptlink')[1]  # From club is in the 7th td
                    from_club_name = from_club_td.find('a').text.strip() if from_club_td.find('a') else 'N/A'

                    # Extracting to club details
                    to_club_td = row.find_all('td', class_='hauptlink')[2]  # To club is in the 8th td
                    to_club_name = to_club_td.find('a').text.strip() if to_club_td.find('a') else 'N/A'

                    # Extracting country of the 'To Club'
                    to_country_td = to_club_td.find_next('td')
                    if to_country_td:
                      to_country_img = to_country_td.find('img', class_='flaggenrahmen')
                      to_country = to_country_img['title'] if to_country_img and 'title' in to_country_img.attrs else 'N/A'
                    else:
                      to_country = 'N/A'

                    # Extracting league information (this is now corrected to target the league's <a> tag)
                    league_td = row.find_all('td')[16]  # League information should be in the correct td
                    if league_td:
                      league_link = league_td.find('a')
                      league = league_link.text.strip() if league_link else 'N/A'
                    else:
                      league = 'N/A'

                    # Extracting transfer fee
                    transfer_fee = row.find_all('td', class_='rechts')[1].text.strip() if row.find_all('td', class_='rechts') else 'N/A'

                    # Write the transfer details to the CSV file
                    writer.writerow([player_name, age, position, transfer_date, market_value, nationality, from_club_name, to_club_name, to_country, league, transfer_fee])
                    print(f"Scraped: {player_name}, {age}, {position}, {transfer_date}, {market_value}, {nationality}, {from_club_name}, {to_club_name}, {to_country}, {league}, {transfer_fee}")
                except Exception as e:
                    print(f"Error scraping row: {e}")
                    continue
        else:
            print(f"No table found on page {page}.")

print(f"Data has been saved to {csv_file}")


Fetching page 1...
Table found, scraping data...
Scraped: Julián Alvarez, 24, Centre-Forward, Aug 12, 2024, €90.00m, Argentina, Italy, Man City, Atlético Madrid, Spain, LaLiga, €75.00m
Scraped: Douglas Luiz, 26, Central Midfield, Jul 1, 2024, €70.00m, Brazil, Aston Villa, Juventus, Italy, Serie A, €51.50m
Scraped: Matthijs de Ligt, 25, Centre-Back, Aug 13, 2024, €65.00m, Netherlands, Bayern Munich, Man Utd, England, Premier League, €45.00m
Scraped: Dominic Solanke, 26, Centre-Forward, Aug 10, 2024, €40.00m, England, Nigeria, Bournemouth, Tottenham, England, Premier League, €64.30m
Scraped: Leny Yoro, 18, Centre-Back, Jul 18, 2024, €50.00m, France, Cote d'Ivoire, LOSC Lille, Man Utd, England, Premier League, €62.00m
Scraped: Pedro Neto, 24, Right Winger, Aug 11, 2024, €55.00m, Portugal, Wolves, Chelsea, England, Premier League, €60.00m
Scraped: Moussa Diaby, 25, Right Winger, Jul 24, 2024, €55.00m, France, Mali, Aston Villa, Al-Ittihad, Saudi Arabia, Saudi Pro League, €60.00m
Scraped: A

In [None]:
from google.colab import files
files.download('top_transfers_premier_league_2024.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>