In [85]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import re

In [77]:
def getSoup(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    return soup

In [78]:
TRANSFERMARKT_URL_HEAD = 'https://www.transfermarkt.us'

CURRENCIES = {
    '€': 1,
    '$': 0.89,
    '£': 1.19,
    'm': 1000000,
    'k': 1000
}

In [79]:
def generateClubs(league_soup):
    clubs = []
    club_link_elements = league_soup.find_all(class_='hauptlink no-border-links')
    
    for club_link_element in club_link_elements:
        club = {}
        club_link = TRANSFERMARKT_URL_HEAD + club_link_element.find('a')['href']
        club_id_pattern = r'/verein/(\d+)/'
        club_id_match = re.search(club_id_pattern, club_link)
        club_id = int(club_id_match.group(1))
        club_name = club_link_element.find('a')['title']
        club['Id'] = club_id
        club['Name'] = club_name
        club['Link'] = club_link
        clubs.append(club)
        
    return clubs

In [80]:
def generatePlayers(clubs):
    players = []
    
    for club in clubs:
        club_link = club['Link']
        club_soup = getSoup(club_link)
        player_link_elements = club_soup.find_all(lambda tag: tag.has_attr('class') and tag['class'] == ['hauptlink'])
        for player_link_element in player_link_elements:
            player = {}
            player_link = TRANSFERMARKT_URL_HEAD + player_link_element.find('a')['href']
            player_soup = getSoup(player_link)
            player_id_pattern = r'/spieler/(\d+)'
            player_id_match = re.search(player_id_pattern, player_link)
            player_id = int(player_id_match.group(1))
            player_name_element = player_soup.find(class_='data-header__headline-wrapper')
            player_name = player_link_element.text.strip()
            if player_soup.find('span', text='Full name:'):
                player_full_name = player_soup.find('span', text='Full name:').find_next('span').text.strip()
            elif player_soup.find('span', text='Name in home country:'):
                player_full_name = player_soup.find('span', text='Name in home country:').find_next('span').text.strip()
            else:
                player_full_name = player_name
            try:
                player_shirt_number = int(player_soup.find(class_='data-header__shirt-number').text.replace('#', '').strip())
            except:
                player_shirt_number = None
            player_dob_string = player_soup.find('span', text='Date of birth/Age:').find_next('span').text.split('(')[0].strip()
            player_dob = datetime.strptime(player_dob_string, '%b %d, %Y')
            player_country = player_soup.find(itemprop="nationality").text.strip()
            player_position = player_soup.find(class_="detail-position__position").text.strip()
            player_club = club['Name']
            player_contract_expires_string = player_soup.find('span', text='Contract expires:').find_next('span').text.split('(')[0].strip()
            try:
                player_contract_expires = datetime.strptime(player_contract_expires_string, '%b %d, %Y')
            except ValueError:
                player_contract_expires = None
            try:
                player_market_value_pattern = r'([^\d]*)([\d,.]+)([a-zA-Z]+)'
                player_market_value_string = player_soup.find(class_='data-header__market-value-wrapper').text.split(' Last')[0].strip()
                player_market_value_match = re.match(player_market_value_pattern, player_market_value_string).groups()
                player_market_value_currency_conversion = CURRENCIES[player_market_value_match[0]]
                player_market_value_number = float(player_market_value_match[1])
                player_market_value_multiplier = CURRENCIES[player_market_value_match[2]]
                player_market_value = player_market_value_currency_conversion * player_market_value_number * player_market_value_multiplier
            except:
                player_market_value = None
            #print(player_soup.prettify())
            player['Id'] = player_id
            player['Name'] = player_name
            player['Full Name'] = player_full_name
            player['Link'] = player_link
            player['Shirt Number'] = player_shirt_number
            player['DOB'] = player_dob
            player['Country'] = player_country
            player['Position'] = player_position
            player['Club'] = player_club
            player['Contract Expires'] = player_contract_expires
            player['Market Value'] = player_market_value
            players.append(player)
            print(player_club, '-', player_name)
            #break
        #break
    
    return players
        

In [81]:
def generateLeague(league_url):
    league = {}
    league_soup = getSoup(league_url)
    league_name = league_soup.find(class_='data-header__headline-wrapper data-header__headline-wrapper--oswald').text.strip()
    league_clubs = generateClubs(league_soup)
    league_players = generatePlayers(league_clubs)
    league['Name'] = league_name
    league['Clubs'] = league_clubs
    league['Players'] = league_players
    
    return league

In [82]:
PL_URL = 'https://www.transfermarkt.us/premier-league/startseite/wettbewerb/GB1'

league = generateLeague(PL_URL)

Manchester City - Ederson
Manchester City - Stefan Ortega
Manchester City - Scott Carson
Manchester City - Rúben Dias
Manchester City - Manuel Akanji
Manchester City - Nathan Aké
Manchester City - John Stones
Manchester City - Josko Gvardiol
Manchester City - Josh Wilson-Esbrand
Manchester City - Rico Lewis
Manchester City - Kyle Walker
Manchester City - Rodri
Manchester City - Matheus Nunes
Manchester City - Mateo Kovacic
Manchester City - İlkay Gündoğan
Manchester City - James McAtee
Manchester City - Bernardo Silva
Manchester City - Kevin De Bruyne
Manchester City - Jérémy Doku
Manchester City - Jack Grealish
Manchester City - Phil Foden
Manchester City - Savinho
Manchester City - Oscar Bobb
Manchester City - Erling Haaland
Arsenal FC - David Raya
Arsenal FC - Neto
Arsenal FC - William Saliba
Arsenal FC - Gabriel Magalhães
Arsenal FC - Riccardo Calafiori
Arsenal FC - Jurrien Timber
Arsenal FC - Jakub Kiwior
Arsenal FC - Oleksandr Zinchenko
Arsenal FC - Kieran Tierney
Arsenal FC - Be

In [92]:
league

{'Name': 'Premier League',
 'Clubs': [{'Id': 281,
   'Name': 'Manchester City',
   'Link': 'https://www.transfermarkt.us/manchester-city/startseite/verein/281/saison_id/2024'},
  {'Id': 11,
   'Name': 'Arsenal FC',
   'Link': 'https://www.transfermarkt.us/fc-arsenal/startseite/verein/11/saison_id/2024'},
  {'Id': 631,
   'Name': 'Chelsea FC',
   'Link': 'https://www.transfermarkt.us/fc-chelsea/startseite/verein/631/saison_id/2024'},
  {'Id': 31,
   'Name': 'Liverpool FC',
   'Link': 'https://www.transfermarkt.us/fc-liverpool/startseite/verein/31/saison_id/2024'},
  {'Id': 985,
   'Name': 'Manchester United',
   'Link': 'https://www.transfermarkt.us/manchester-united/startseite/verein/985/saison_id/2024'},
  {'Id': 148,
   'Name': 'Tottenham Hotspur',
   'Link': 'https://www.transfermarkt.us/tottenham-hotspur/startseite/verein/148/saison_id/2024'},
  {'Id': 762,
   'Name': 'Newcastle United',
   'Link': 'https://www.transfermarkt.us/newcastle-united/startseite/verein/762/saison_id/2024'