In [795]:
import pandas as pd
import requests
import numpy as np
from urllib.parse import quote

In [796]:
# The goal here is to create the Player Table
# Columns: PlayerID, PlayerName, TeamID, LeagueID, Position, MarketValue, Nationality, Age,
#          InternationalCaps, shirtNumber, jersey_name

## Source of data : afcon-2023-final-squads-list.csv
#### Data that need to be extracted : PlayerName, Nationality, ShirtNumber, JerseyName

In [797]:
# Read data
squad_list_df = pd.read_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/afcon-2023-final-squads-list.csv", encoding='ISO-8859-1')

# change column name
squad_list_df = squad_list_df.rename(columns={'team': 'Nationality'})
squad_list_df = squad_list_df.rename(columns={'shirt_no.': 'ShirtNumber'})
squad_list_df = squad_list_df.rename(columns={'name': 'PlayerName'})
squad_list_df = squad_list_df.rename(columns={'jersey_name': 'JerseyName'})

# Cleaning 'name' and 'jersey_name' columns
for col in ['PlayerName', 'JerseyName']:
    squad_list_df[col] = squad_list_df[col].replace('\n', ' ', regex=True).str.strip().str.title()

# Deleting 'club_country' column
squad_list_df.drop(['position', 'birthdate'], axis=1, inplace=True)  # Uncomment when 'club_country' is in the DataFrame

# Function to remove additional spaces between words in a string
def remove_additional_spaces(text):
    return ' '.join(text.split())

# Applying this function to relevant columns
for col in ['PlayerName', 'JerseyName']:
    squad_list_df[col] = squad_list_df[col].apply(remove_additional_spaces)

squad_list_df['PlayerId'] = np.arange(0, len(squad_list_df))

squad_list_df

Unnamed: 0,ShirtNumber,PlayerName,JerseyName,Nationality,PlayerId
0,6,Aaron Tshibola,Tshibola,Congo,0
1,24,Abdallah Dipo Sima,Sima,Senegal,1
2,17,Abdallahi Mahmoud,M. Abdallahi,Mauritania,2
3,18,Abdelkabir Abqar,Abqar,Maroc,3
4,16,Abdessamad Ezzalzouli,Ezzalzouli,Maroc,4
...,...,...,...,...,...
614,2,Zephaniah Phiri,Phiri,Zambia,614
615,26,Zidane Agustini Banjaqui,Zidane Banjaqui,Guinea-Bissau,615
616,24,Zineddine Belaid,Belaid,Algeria,616
617,9,Zinho Gano,Zinho Gano,Guinea-Bissau,617


## Source of data : API market place
#### Data that needs to be extracted : PlayerName (for joint), Position, MarketValue, Club, Age

In [798]:
# Function to get player data
def get_player_data(player_name):
    formatted_name = quote(player_name)  # Encoding the player name for URL
    url = f"https://transfermarkt-api.vercel.app/players/search/{formatted_name}?page_number=1"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve data for {player_name}. Status code: {response.status_code}")
        return None

# Function to get player data
def get_player_data_with_id(player_name, player_id):
    formatted_name = quote(player_name)  # Encoding the player name for URL
    url = f"https://transfermarkt-api.vercel.app/players/{player_id}/profile"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve data for {player_name}. Status code: {response.status_code}")
        return None


def extract_player_data(data, player_id, player_nationality):
    results = data.get('results', None)
    
    if results:

        if len(results) > 1:
            results = data.get('results', None)
            for i in range(len(results)):
                nationalities = results[i].get('nationalities', None)
                if player_nationality in nationalities:
                    break
            results = results[i]
        else:
            results = results[0]

        output = { # extract results of the request response
            'PlayerId': player_id,
            'TransfertMarketId': results.get('id', None),
            'TransfertMarketPlayerName': results.get('name', None),
            'Position': results.get('position', None),
            'ClubName': results['club'].get('name', None) if player_info.get('club') else None,
            'Age': results.get('age', None),
            'MarketValue': results.get('marketValue', None)
            }  # Assuming each response is a list of data
    else:
        output = { # No result -> the name is probably wrong
            'PlayerId': player_id,
            'TransfertMarketId': None,
            'TransfertMarketPlayerName': player_name,
            'Position': None,
            'ClubName': None,
            'Age': None,
            'MarketValue': None
            }
    
    return output

In [799]:
def loop_request_transfert_market_api(squad_list_df, initial_df):
    # Collecting data for each player
    all_players_data = []
    players_not_found = []
    for i in range(squad_list_df.shape[0]):
        player_name = squad_list_df.iloc[i]['PlayerName']
        jersey_name = squad_list_df.iloc[i]['JerseyName']
        player_id = squad_list_df.iloc[i]['PlayerId']
        player_nationality = squad_list_df.iloc[i]['Nationality']

        data = get_player_data(player_name)
        if data:
            extracted_results = extract_player_data(data, player_id, player_nationality)
            if not extracted_results.get('TransfertMarketId', None):
                # the player name was not found
                print(f"No result found for player : {player_name}")
            else: # we have the data with the player name
                all_players_data.append(extracted_results)
        else: # error 500
            players_not_found.append({'PlayerName': player_name, 'PlayerId': player_id, 'JerseyName': jersey_name, 'Nationality': player_nationality})
    
    output_df = pd.concat([initial_df, pd.DataFrame(all_players_data)])
            
    return output_df, pd.DataFrame(players_not_found)

In [800]:
all_players_data_df, players_not_found_df = loop_request_transfert_market_api(squad_list_df, pd.DataFrame())

Failed to retrieve data for Amar Sharaf Eldin. Status code: 504
Failed to retrieve data for Edgar Miguel Ie. Status code: 504
Failed to retrieve data for El-Hadji Ba. Status code: 504
Failed to retrieve data for Stephane Aziz Ki. Status code: 504


In [802]:
print(squad_list_df.shape[0])
print(all_players_data_df.shape[0])
print(players_not_found_df.shape[0])

619
615
4


In [803]:
all_players_data_df, players_not_found_df = loop_request_transfert_market_api(players_not_found_df, all_players_data_df)

In [804]:
print(squad_list_df.shape[0])
print(all_players_data_df.shape[0])
print(players_not_found_df.shape[0])

619
619
0


In [805]:
# Function to convert market value string to integer
def convert_market_value(value):
    if pd.isna(value):
        return None
    value = value.replace('€', '').lower()
    if 'k' in value:
        return int(float(value.replace('k', '')) * 1000)
    elif 'm' in value:
        return int(float(value.replace('m', '')) * 1000000)
    elif '-' in value:
        return 0
    return int(value)

In [863]:
players_df = squad_list_df.merge(all_players_data_df, on='PlayerId', how='left')
# Applying the conversion function to the 'MarketValue' column
players_df['MarketValue'] = players_df['MarketValue'].apply(convert_market_value)

club_players_df = pd.DataFrame(players_df['ClubName'].unique(), columns=['ClubName'])
club_players_df['ClubId'] = np.arange(0, len(club_players_df))

In [864]:
club_df = pd.read_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/Club.csv", encoding='ISO-8859-1')

club_final_df = club_players_df.merge(club_df, on='ClubName', how='left')
club_final_df.sort_values(by=['BestClub'], inplace=True, ascending=False)

# Fill NaN values in specific columns with desired values
club_final_df['TopLeague'] = club_final_df['TopLeague'].fillna(False)
club_final_df['BestClub'] = club_final_df['BestClub'].fillna(False)
club_final_df['Country'] = club_final_df['Country'].fillna('No')

players_df = players_df.merge(club_final_df, on='ClubName', how='left')

players_df.drop(['PlayerId', 'ClubName', 'Country', 'BestClub', 'TopLeague', 'JerseyName', 'TransfertMarketPlayerName'], axis=1, inplace=True)
players_df = players_df.rename(columns={'TransfertMarketId': 'PlayerId'})

In [869]:
players_df.to_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/clean/Player.csv", encoding='utf-8-sig', index=False)
club_final_df.to_csv("C:/Users/guygi/OneDrive/Bureau/concaf_analytics/datasets/clean/Club.csv", encoding='utf-8-sig', index=False)