In [24]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_transfer_data(season, vereins_id, team_name, heim=True):
    url = f'https://www.transfermarkt.com/{team_name.lower().replace(' ', '-')}/transfers/verein/{vereins_id}/saison_id/{season}'
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    transfer_tables = soup.find_all('table', class_='items')
    if heim:
        transfer_table = transfer_tables[0] if transfer_tables else None
    else:
        transfer_table = transfer_tables[1] if len(transfer_tables) > 1 else None
    
    if not transfer_table:
        return pd.DataFrame()  # return an empty DataFrame if no table found
    
    rows = transfer_table.find_all('tr', class_=['odd', 'even'])
    transfers = []
    
    for row in rows:
        cols = row.find_all('td')
        if len(cols) > 3:
            player_info = cols[1].text.split(' \n\n\n')
            player_position = player_info[2].strip() if player_info else ""
            player_name = player_info[1].strip() if len(player_info) > 1 else ""
            transfer_fee = cols[11].text.strip()
            tag = ""
            if heim:
                tag = "Arrival"
            else:
                tag = "Departure"
            season_name = str(season) + "/" + str(season + 1)
            transfers.append({'Player': player_name, "Team": team_name, 'Position': player_position, 'Fee': transfer_fee, "Season": season_name, "Transfer": tag})
    
    return pd.DataFrame(transfers)

def scrape_all_seasons(start_season, end_season, vereins_id, team_name, heim=True):
    all_transfers = pd.DataFrame()
    
    for season in range(start_season, end_season + 1):
        season_transfers = scrape_transfer_data(season, vereins_id, team_name, heim)
        all_transfers = pd.concat([all_transfers, season_transfers], ignore_index=True)
    
    return all_transfers

# Collect all transfers into a single DataFrame
all_transfers = pd.DataFrame()

clubs = [
    (31, "Liverpool"),
    (985, "Manchester United"),
    (631, "Chelsea"),
    (11, "Arsenal"),
    (148, "Tottenham Hotspur"),
    (281, "Manchester City")
]

for club_id, club_name in clubs:
    club_transfers = scrape_all_seasons(2013, 2018, club_id, club_name)
    all_transfers = pd.concat([all_transfers, club_transfers], ignore_index=True)
    club_transfers = scrape_all_seasons(2013, 2018, club_id, club_name, False)
    all_transfers = pd.concat([all_transfers, club_transfers], ignore_index=True)

# all_transfers now contains the transfer data of all clubs from 2012 to 2022

In [25]:
print(all_transfers.to_string())

                         Player               Team            Position                      Fee     Season   Transfer
0                 Mamadou Sakho          Liverpool         Centre-Back                  €19.00m  2013/2014    Arrival
1                    Iago Aspas          Liverpool      Centre-Forward                  €10.80m  2013/2014    Arrival
2                Simon Mignolet          Liverpool          Goalkeeper                  €10.60m  2013/2014    Arrival
3                  Luis Alberto          Liverpool    Central Midfield                   €8.00m  2013/2014    Arrival
4                   Tiago Ilori          Liverpool         Centre-Back                   €7.50m  2013/2014    Arrival
5                  Victor Moses          Liverpool      Right Midfield          Loan fee:€1.20m  2013/2014    Arrival
6                  Aly Cissokho          Liverpool         Centre-Back          Loan fee:€1.00m  2013/2014    Arrival
7                    Kolo Touré          Liverpool      