In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import random

base_url = "https://www.transfermarkt.com"
squad_url = "https://www.transfermarkt.com/borussia-dortmund/kader/verein/16/saison_id/2023/plus/1"

headers = {
    "User-Agent": "Mozilla/5.0"
}

response = requests.get(squad_url, headers=headers)
soup = BeautifulSoup(response.content, "html.parser")

players_data = []

# Get all player profile links
for row in soup.select("table.items > tbody > tr"):
    if "class" in row.attrs and "odd" not in row["class"] and "even" not in row["class"]:
        continue

    try:
        # Basic info
        name_tag = row.find("td", class_="hauptlink").a
        name = name_tag.get_text(strip=True)
        profile_path = name_tag['href']
        profile_url = base_url + profile_path

        print(f"Scraping {name} -> {profile_url}")
        profile_res = requests.get(profile_url, headers=headers)
        profile_soup = BeautifulSoup(profile_res.content, "html.parser")

        # Extract age, height, weight, position
        info_box = profile_soup.find("div", class_="info-table")
        facts = profile_soup.select("div.info-table > div")

        age = height = weight = position = "N/A"

        for fact in facts:
            label = fact.find("span", class_="info-label")
            if not label:
                continue
            key = label.get_text(strip=True)
            value = fact.find("span", class_="info-content").get_text(strip=True)

            if "Age" in key:
                age = value
            elif "Height" in key:
                height = value
            elif "Weight" in key:
                weight = value
            elif "Position" in key:
                position = value

        # Look for injury table (if any)
        injury_url = profile_url.replace("profil", "verletzungen")  # /profil/ → /verletzungen/
        injury_res = requests.get(injury_url, headers=headers)
        injury_soup = BeautifulSoup(injury_res.content, "html.parser")

        injuries = []
        injury_table = injury_soup.find("table", class_="items")
        if injury_table:
            for injury_row in injury_table.select("tbody > tr"):
                cols = injury_row.find_all("td")
                if len(cols) >= 5:
                    injury_type = cols[1].get_text(strip=True)
                    start_date = cols[2].get_text(strip=True)
                    end_date = cols[3].get_text(strip=True)
                    days_missed = cols[4].get_text(strip=True)

                    injuries.append({
                        "type": injury_type,
                        "start": start_date,
                        "end": end_date,
                        "days": days_missed
                    })

        players_data.append({
            "Name": name,
            "Age": age,
            "Height": height,
            "Weight": weight,
            "Position": position,
            "Injuries": injuries
        })

        time.sleep(random.uniform(2, 4))  # Be nice to the server

    except Exception as e:
        print(f"Error with player: {e}")
        continue

# Convert to DataFrame
df = pd.DataFrame(players_data)
df.to_json("players_with_injuries.json", orient="records", indent=2)

print("Scraping completed! Data saved to players_with_injuries.json.")


Scraping Gregor Kobel -> https://www.transfermarkt.com/gregor-kobel/profil/spieler/257814
Scraping Marcel Lotka -> https://www.transfermarkt.com/marcel-lotka/profil/spieler/453737
Scraping Alexander Meyer -> https://www.transfermarkt.com/alexander-meyer/profil/spieler/76158
Scraping Nico Schlotterbeck -> https://www.transfermarkt.com/nico-schlotterbeck/profil/spieler/388198
Scraping Niklas Süle -> https://www.transfermarkt.com/niklas-sule/profil/spieler/166601
Scraping Mats Hummels -> https://www.transfermarkt.com/mats-hummels/profil/spieler/39728
Scraping Hendry Blank -> https://www.transfermarkt.com/hendry-blank/profil/spieler/804831
Scraping Antonios Papadopoulos -> https://www.transfermarkt.com/antonios-papadopoulos/profil/spieler/482573
Scraping Ian Maatsen -> https://www.transfermarkt.com/ian-maatsen/profil/spieler/485585
Scraping Ramy Bensebaini -> https://www.transfermarkt.com/ramy-bensebaini/profil/spieler/284732
Scraping Guille Bueno -> https://www.transfermarkt.com/guille-bu