In [None]:
import pandas as pd
file_paths = ['missed1.txt', 'missed2.txt', 'players2015.txt', 'players2016.txt', 'players2017.txt', 'players2018.txt', 'players2019.txt', 'players2020.txt', 'players2021.txt','players2022.txt']
strings = []

for file_path in file_paths:
    with open(file_path, 'r') as file:
        file_strings = file.read().split()
        strings.extend(file_strings)

df = pd.DataFrame({'Numerical Strings': strings})

df = df.drop_duplicates()
print(df)

In [None]:
import requests
import json
import time
import pandas as pd

playerIdList = df['Numerical Strings'].apply(lambda x: int(x.strip("[]',"))).tolist()
playerIdList=playerIdList[3180:]
data = []
errorPlayerIds = []
print(len(playerIdList))
batch_size = 20  # Number of requests to be made in each batch
delay_between_batches = 30  # Delay between batches in seconds

i = 1
for batch_index in range(0, len(playerIdList), batch_size):
    batch_ids = playerIdList[batch_index:batch_index + batch_size]
    print(f"Processing Batch {i}")
    i += 1
    for player_id in batch_ids:
        print(f"Processing Player ID: {player_id}")
        try:
            url = f"https://transfermarkt-api.vercel.app/players/{player_id}/transfers"
            headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
            try:
                response = requests.get(url, headers=headers)
                time.sleep(1)  # Delay for 1 second
            except requests.exceptions.SSLError as ssl_error:
                print(f"SSLError occurred for player_id: {player_id}")
                errorPlayerIds.append(player_id)
                continue

            if response.status_code == 200:
                try:
                    json_data = response.json()
                    for transfer in json_data.get("history", []):
                        transfer_data = {
                            "player_id": json_data.get("id"),
                            "season_id": transfer.get("transferDate"),
                            "origin_club_id": transfer.get("oldClubID"),
                            "destination_club_id": transfer.get("newClubID"),
                            "market_value": transfer.get("marketValue"),
                            "fee": transfer.get("fee")
                        }
                        data.append(transfer_data)
                        print(i, transfer_data)
                except json.JSONDecodeError:
                    print(f"Error decoding JSON response for player_id: {player_id}")
                    errorPlayerIds.append(player_id)
            else:
                print(f"Server error for player_id: {player_id}, status code: {response.status_code}")
                errorPlayerIds.append(player_id)
        except requests.RequestException as e:
            print(f"An error occurred for player_id: {player_id}")
            errorPlayerIds.append(player_id)

    df_temp = pd.DataFrame(data)
    df_temp.to_csv('transfers.csv', index=False, mode='a', header=False)
    data.clear()  # Clear the data list for the next batch

    time.sleep(delay_between_batches)  # Delay between batches

df = pd.DataFrame(data)
df.to_csv('transfers.csv', index=False, mode='a', header=False)

print("Player IDs with server errors:", errorPlayerIds)


In [None]:
import pandas as pd

df = pd.read_csv('transfers.csv')

df['season_id'] = df['season_id'].replace('-', pd.NaT)
df['season_id'] = pd.to_datetime(df['season_id']).dt.year

df['player_id'] = 'p-' + df['player_id'].astype(str)
df['origin_club_id'] = 'c-' + df['origin_club_id'].astype(str)
df['destination_club_id'] = 'c-' + df['destination_club_id'].astype(str)

df['market_value'] = df['market_value'].replace(['free transfer', 'loan', '-', '?'], None)
df['market_value'] = df['market_value'].str.replace('€', '')
df['market_value'] = df['market_value'].str.replace('k', 'e3')
df['market_value'] = df['market_value'].str.replace('m', 'e6')
df['market_value'] = pd.to_numeric(df['market_value'], errors='coerce')

df['fee'] = df['fee'].replace(['free transfer', 'loan', '-', '?'], None)
df['fee'] = df['fee'].str.replace('€', '')
df['fee'] = df['fee'].str.replace('k', 'e3')
df['fee'] = df['fee'].str.replace('m', 'e6')
df['fee'] = pd.to_numeric(df['fee'], errors='coerce')

df.to_csv('transfers_modified.csv', index=False)

In [None]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('transfers_modified.csv')

# Add a new column 'id' with values starting from 1
df.insert(0, 'id', range(1, len(df) + 1))

# Save the modified DataFrame to a new CSV file
df.to_csv('output.csv', index=False)