In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re

In [2]:
transfertmarkt_teams_id = {
    'Congo': '3854',
    'Senegal': '3499',
    'Mauritania': '14238',
    'Maroc': '3575',
    'Tanzania': '14666',
    'Burkina Faso': '5872', 
    'Guinea': '3856',
    'Gambia': '6186', 
    'Mali': '3674', 
    'Namibia': '3573', 
    'Algeria': '3614',
    'Nigeria': '3444', 
    'Angola': '3585', 
    'Egypt': '3672',
    'Tunisie': '3670',
    'Equatorial Guinea': '13485',
    'Ghana': '3441',
    'Guinea-Bissau': '3701',
    'Mozambique': '5129',
    'Cameroon': '3434',
    'South Africa': '3806',
    'Ivory Coaste': '3591',
    'Zambia': '3703',
    'Cape Verde': '4311',
    'Ivory Coast': '3591'
}

In [17]:
def download_team_picture(pageSoup, folder_path="team_pictures"):

    # Find the image tag
    image_tag = pageSoup.find('img', class_='flaggenrahmen')
    if not image_tag:
        print("Image not found")
        return

    # Get the image URL
    img_url = image_tag['src']

    # Get the image name (You might want to change this based on your requirements)
    img_name = img_url.split('/')[-1]

    # Ensure the folder exists
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    # Download and save the image
    response = requests.get(img_url)
    if response.status_code == 200:
        with open(os.path.join(folder_path, img_name), 'wb') as file:
            file.write(response.content)
        print(f"Image saved as {img_name} in folder {folder_path}")
    else:
        print(f"Failed to download image. Status code: {response.status_code}")

def extract_players_information(pageSoup):
    # Initialize an empty list to store player information
    players_info = []

    # Iterate through each player entry in the HTML
    for row in pageSoup.find_all('tr', {'class': ['even', 'odd']}):
        # Extracting each piece of information
        shirt_number = row.find('div', class_='rn_nummer').text.strip()
        player_link = row.find('a', href=re.compile(r'/profil/spieler/'))
        player_id = re.search(r'/profil/spieler/(\d+)', player_link['href']).group(1)
        player_name = player_link.text.strip()
        age = row.find_all('td')[3].text.strip()  # Adjust index based on HTML structure
        position = row.find_all('td')[4].text.strip()  # Adjust index based on HTML structure
        club = row.find('a', href=re.compile(r'/startseite/verein/'))['title']

        players_info.append({
            'ShirtNumber': shirt_number,
            'PlayerID': player_id,
            'PlayerName': player_name,
            'Age': age,
            'Position': position,
            'Club': club
        })

    return players_info

def scrap_national_team_transfertmarkt(team_name, team_id):
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
    }

    try:
        page = f"https://www.transfermarkt.fr/elfenbeinkuste/kader/verein/{team_id}/saison_id/2023"
        response = requests.get(page, headers=headers)
        response.raise_for_status()  # Raise an error for bad status codes

        pageSoup = BeautifulSoup(response.content, 'html.parser')

        #download_team_picture(pageSoup)
        
        players_info = extract_players_information(pageSoup)
        
        return players_info

    except requests.RequestException as e:
        return f"Request error: {e}"
    except ValueError as e:
        return f"Data parsing error: {e}"

In [18]:
# Initialize an empty list to store all player data
all_players_data = []

# Loop over each team in the dictionary
for team_name, team_id in transfertmarkt_teams_id.items():
    # Scrape player data for each team
    team_players = scrap_national_team_transfertmarkt(team_name, team_id)
    
    # Check if the function returned a list (valid data)
    if isinstance(team_players, list):
        all_players_data.extend(team_players)
    else:
        print(f"Error while processing team {team_name}: {team_players}")

    break

# Create a DataFrame from the aggregated data
#df = pd.DataFrame(all_players_data, columns=['ShirtNumber', 'PlayerID', 'PlayerName','Position', 'Age', 'Club'])

In [19]:
pd.DataFrame(all_players_data, columns=['ShirtNumber', 'PlayerID', 'PlayerName','Position', 'Age', 'Club'])

Unnamed: 0,ShirtNumber,PlayerID,PlayerName,Position,Age,Club
0,16,371816,Dimitry Bertaud,Gardien de but,Dimitry Bertaud,Montpellier Hérault SC


In [38]:
"""
# Group by 'Nationality' and count the number of players in each group
team_player_counts = df.groupby('Nationality')['PlayerId'].count()

# Convert the Series back to a DataFrame for better formatting, if desired
team_player_counts_df = team_player_counts.reset_index(name='NumberOfPlayers')

# Display the resulting DataFrame
print(team_player_counts_df)
"""

"\n# Group by 'Nationality' and count the number of players in each group\nteam_player_counts = df.groupby('Nationality')['PlayerId'].count()\n\n# Convert the Series back to a DataFrame for better formatting, if desired\nteam_player_counts_df = team_player_counts.reset_index(name='NumberOfPlayers')\n\n# Display the resulting DataFrame\nprint(team_player_counts_df)\n"