In [38]:
import pandas as pd
import numpy as np
from collections import Counter
from sklearn.preprocessing import OneHotEncoder
import os
import glob
from tqdm import tqdm
import matplotlib.pyplot as plt

In [39]:
# normalize the data ?
# noramlize the rates between 0 and 1

# don't keep the names
# player positions: remove it (remove nation position) and keep club position --> prescription: constraint on the number of players per zone. Limit to Attack / Midfield / Defend / Goalkeeper
# keep overall and potential, maybe replace potential by potential - overall
# keep club joined year, and club contract valid year
# work rate: replace by two columns, in numerical values ? or remove it
# get rid of improvements 
# keep league levels

# remove ids, urls, club / league names, club_loaned_from, nationalities / nations (id, name etc.), player tags and player traits, 

In [40]:
# define function so we can easily apply it to all the datasets
def data_preprocessing(data):
    
    # display all columns of the dataframe
    pd.set_option('display.max_columns', None)

    # remove unnecessary columns
    data.drop(['sofifa_id', 'player_url', 'long_name', 'club_position', 'dob', 'club_team_id', 
        #'club_name',
        'league_name', 'club_jersey_number', 'club_loaned_from', 'nationality_id', 'nationality_name', 
        'nation_team_id', 'nation_position', 'nation_jersey_number', 'body_type', 'real_face', 'player_tags', 'player_traits',
        'goalkeeping_speed', 'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm',
        'rm', 'lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb', 'gk', 'player_face_url', 'club_logo_url', 'club_flag_url', 
        'nation_logo_url', 'nation_flag_url'], axis = 1, inplace = True)

    # instead of club position, I keep player_positions and just pick the first element. The reason is that in club_position there are also SUB and RES
    # keep just the first position
    data['player_positions'] = data['player_positions'].apply(lambda x: x.split(',')[0])    

    # translate club position to role, dividing goalkeepers, defenders, midfielders and attackers
    attackers = ['ST', 'LW', 'RW', 'CF', 'LF', 'RF', 'RS', 'LS']
    midfielders = ['CAM', 'CM', 'CDM', 'RCM','LM', 'RM', 'LAM', 'RAM', 'LW', 'RW', 'RDM', 'LCM','LDM']
    defenders = ['CB', 'LB', 'RB', 'LCB', 'RCB', 'LWB', 'RWB']
    goalkeepers = ['GK']

    data['player_positions'] = data['player_positions'].apply(lambda x: 'A' if x in attackers else 
        ('M' if x in midfielders else ('D' if x in defenders else ('G' if x in goalkeepers else x))))

    # apply one hot encoding to player positions
    dummies = pd.get_dummies(data['player_positions'], prefix = 'position')
    data.drop(['player_positions'], axis = 1, inplace = True) # drop original column
    data = pd.concat([data, dummies], axis = 1) # concat the dummy columns


    # only keep year of when club was joined
    data['club_joined'] = [int(data.loc[0,'club_joined'].split('-')[0]) for i in range(len(data['club_joined']))]
    
    # one hot encoding of preferred foot, 0 = left, 1 = right
    data['preferred_foot'] = data['preferred_foot'].apply(lambda x: 0 if x == 'Left' else 1)

    # ordinal encoding of work rate (1 = Low, 2 = Medium, 3 = High), split between attack and defense. 
    data['attack_work_rate'] = data['work_rate'].apply(lambda x: 1 if x.split('/')[0] == 'Low' else (2 if x.split('/')[0] == 'Medium' else 3))
    data['defense_work_rate'] = data['work_rate'].apply(lambda x: 1 if x.split('/')[1] == 'Low' else (2 if x.split('/')[1] == 'Medium' else 3))
    data.drop(['work_rate'], axis = 1, inplace = True)

    # remove nan values
    data = data.dropna()

    # return the dataframe
    return data

In [41]:
# save dataframes from different years. Each entry of the dataframe has the cleaned
# fifa data for a specific year
total_data = []

for year in range(20, 22+1):
    # read the data
    path = '../data/fifa/players_{}.csv'.format(year)
    data = pd.read_csv(path)

    # preprocess the data
    total_data.append(data_preprocessing(data))

# for each year, add the year column to the dataframe
for i in range(len(total_data)):
    total_data[i]['year'] = 2000 + i

  data = pd.read_csv(path)


In [42]:
total_data[0].head()

Unnamed: 0,short_name,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,club_name,league_level,club_joined,club_contract_valid_until,preferred_foot,weak_foot,skill_moves,international_reputation,release_clause_eur,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,position_A,position_D,position_G,position_M,attack_work_rate,defense_work_rate,year
0,L. Messi,94,94,95500000.0,560000.0,32,170,72,FC Barcelona,1.0,2004,2021.0,0,4,4,5,195800000.0,87.0,92.0,92.0,96.0,39.0,66.0,88,95,70,92,88,97,93,94,92,96,91,84,93,95,95,86,68,75,68,94,48,40,94,94,75,96,33,37,26,6,11,15,14,8,1,0,0,0,2,1,2000
1,Cristiano Ronaldo,93,93,58500000.0,410000.0,34,187,83,Juventus,1.0,2004,2022.0,1,4,5,5,96500000.0,90.0,93.0,82.0,89.0,35.0,78.0,84,94,89,83,87,89,81,76,77,92,89,91,87,96,71,95,95,85,78,93,63,29,95,82,85,95,28,32,24,7,11,15,14,11,1,0,0,0,3,1,2000
2,Neymar Jr,92,92,105500000.0,290000.0,27,175,68,Paris Saint-Germain,1.0,2004,2022.0,1,5,5,5,195200000.0,91.0,85.0,87.0,95.0,32.0,58.0,87,87,62,87,87,96,88,87,81,95,94,89,96,92,84,80,61,81,49,84,51,36,87,90,90,94,27,26,29,9,9,15,15,11,1,0,0,0,3,2,2000
3,E. Hazard,91,91,90000000.0,470000.0,28,175,74,Real Madrid CF,1.0,2004,2024.0,1,4,4,4,184500000.0,91.0,83.0,86.0,94.0,35.0,66.0,81,84,61,89,83,95,83,79,83,94,94,88,95,90,94,82,56,84,63,80,54,41,87,89,88,91,34,27,22,11,12,6,8,8,1,0,0,0,3,2,2000
4,K. De Bruyne,91,91,90000000.0,370000.0,28,181,70,Manchester City,1.0,2004,2023.0,1,5,4,4,166500000.0,76.0,86.0,92.0,86.0,61.0,78.0,93,82,55,92,82,86,85,83,91,91,77,76,78,91,76,91,63,89,74,90,76,61,88,94,79,91,68,58,51,15,13,5,10,13,0,0,0,1,3,3,2000


####################################################################################################

Now, cleaning of the transfer data

In [43]:
# read all transfer files
path = "../data/Transfers/"
csv_files = glob.glob(os.path.join(path, "*.csv"))

In [44]:
# create unique dataframe with all the transfers
transfers_list = []

# loop over the list of csv files
for f in csv_files:
      
    # read the csv file
    df = pd.read_csv(f)

    # append the dataframe to the list
    transfers_list.append(df)

transfers = pd.concat(transfers_list).reset_index(drop = True)

In [45]:
# only keep transfers happened in 2020, 2021, 2022
transfers = transfers.loc[transfers['year'].isin([2020, 2021, 2022])]

# reset index
transfers.reset_index(inplace = True, drop = True)

# just keep transfer movement in, because there should be a match between in and out, but the ones that just have out mean they are not playing
transfers = transfers[transfers["transfer_movement"] == "in"]

# remove columns we are not interested in
transfers.drop(['age', 'position', 
    #'club_involved_name', 
    'transfer_movement', 'league_name', 'season'], axis = 1, inplace = True)

# one hot encoding of transfer period, 1 = summer, 0 = winter
transfers['transfer_period'] = transfers['transfer_period'].apply(lambda x: 1 if x.lower() == 'summer' else 0)

# check if loan is in fee
transfers['loan'] = transfers['fee'].apply(lambda x: 1 if 'loan' in x.lower() else 0)

# get rid of fee column, and rename fee_cleaned to fee
transfers.drop(['fee'], axis = 1, inplace = True)
transfers.rename(columns = {'fee_cleaned': 'fee'}, inplace = True)

# get rid of loan transfers
transfers = transfers[transfers['loan'] == 0]
transfers.drop(['loan'], axis = 1, inplace = True)

# rename club_name to new_club and club_involved_name to old_club
transfers.rename(columns = {'club_name': 'new_club'}, inplace = True)
transfers.rename(columns = {'club_involved_name': 'old_club'}, inplace = True)

# drop duplicates, meaning player_name that appears twice. We keep the last one since it's the most recent transfer
#transfers.drop_duplicates(subset = ['player_name'], keep = 'last', inplace = True)

# reset index
transfers.reset_index(inplace = True, drop = True)

# remove NaN values
transfers.dropna(inplace = True)

# Now we need to join the two datasets based on player, if there is a doubt, we keep the club_name, if still we are not sure, we drop it

## Analysis

In [46]:
transfers_22 = transfers[transfers['year'] == 2022].reset_index(drop = True)
transfers_21 = transfers[transfers['year'] == 2021].reset_index(drop = True)
transfers_20 = transfers[transfers['year'] == 2020].reset_index(drop = True)

# drop year column when creating ratings_20, ratings_21, ratings_22
ratings_22 = total_data[2].drop(['year'], axis = 1).reset_index(drop = True)
ratings_21 = total_data[1].drop(['year'], axis = 1).reset_index(drop = True)
ratings_20 = total_data[0].drop(['year'], axis = 1).reset_index(drop = True)

In [47]:
transfers_22.head()

Unnamed: 0,new_club,player_name,old_club,transfer_period,fee,year
0,Bayern Munich,Matthijs de Ligt,Juventus,1,67.0,2022
1,Bayern Munich,Sadio Mané,Liverpool,1,32.0,2022
2,Bayern Munich,Mathys Tel,Stade Rennais,1,20.0,2022
3,Bayern Munich,Ryan Gravenberch,Ajax,1,18.5,2022
4,Bayern Munich,Noussair Mazraoui,Ajax,1,0.0,2022


In [48]:
ratings_22[ratings_22.short_name == "S. Mané"]

Unnamed: 0,short_name,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,club_name,league_level,club_joined,club_contract_valid_until,preferred_foot,weak_foot,skill_moves,international_reputation,release_clause_eur,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,position_A,position_D,position_G,position_M,attack_work_rate,defense_work_rate
12,S. Mané,89,89,101000000.0,270000.0,29,175,69,Liverpool,1.0,2021,2023.0,1,4,4,4,186900000.0,91.0,83.0,80.0,89.0,44.0,77.0,78,86,84,84,75,90,77,64,71,88,93,90,93,91,88,83,87,85,72,80,75,35,90,84,71,84,42,42,38,10,10,15,7,14,1,0,0,0,3,2


In [49]:
#!pip install fuzzywuzzy

Collecting fuzzywuzzy
  Using cached fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy
Successfully installed fuzzywuzzy-0.18.0


In [50]:
# player_name from transfers is not the same as player_name from fifa data, so we need to match them
# we can use a metric to measure the similarity between the two names, and then match the ones that are the most similar
# use fuzzywuzzy to get the similarity between two strings
from fuzzywuzzy import fuzz

# function to get the similarity between two strings
def get_similarity(a, b):
    return fuzz.token_sort_ratio(a, b)
    

# function to get the most similar string from a list of strings
def get_most_similar(string, string_list):
    # get the similarity between the string and each string in the list
    similarity = [get_similarity(string, s) for s in string_list]

    # get the index of the most similar string
    max_index = np.argmax(similarity)

    # return the most similar string
    return string_list[max_index]


def get_short_name(player_name):
    # if there is no firts name, return the last name
    if len(player_name.split()) == 1:
        return player_name
    # else, the first name is the first word, and the last name is all the other words
    else:
        first_name = player_name.split()[0]
        last_name = player_name.split()[1:]
        return first_name[0] + ". " + " ".join(last_name)


In [51]:
def map_transfers(transfers, ratings):
    # let's try to map the club names from the transfers dataframe to the club names from the fifa dataframe
    clubs_names = transfers.old_club.unique()
    fifa_clubs = ratings.club_name.unique()

    # create a dictionary to map the club names
    club_name_dict = {}

    # loop over the list of clubs
    for club in tqdm(clubs_names):
        # get the most similar club name from the fifa dataframe
        most_similar_club = get_most_similar(club, fifa_clubs)

        # add the mapping to the dictionary
        club_name_dict[club] = most_similar_club

    # map the club names
    transfers['club_name'] = transfers['old_club'].map(club_name_dict)



    # let's try to map the player names from the transfers dataframe to the player names from the fifa dataframe
    # First possibility to map them: in the ratings dataframe, the short_name looks like "L. Messi", so we can use the first letter of the first name and the last name to match the players
    # if not, we can use the previous functions to match the players

    # get the short name of the players
    transfers['short_name'] = transfers['player_name'].apply(get_short_name)
    
    return transfers


# second function, only if necessary
def add_name_mapping(transfers, ratings):
     # let's try to map the player names from the transfers dataframe to the player names from the fifa dataframe
    # use the get_most_similar function to get the most similar player name from the fifa dataframe
    # create an empty column "player_name_mapped" that we will add to transfers at the end
    player_name_mapped = []

    transfers_info = transfers[['player_name', 'club_name']].values
    ratings_info = ratings[['short_name', 'club_name']].values

    # loop over the list of players
    for player in tqdm(transfers_info):
        # check only the players that play in the same club from ratings
        ratings_players = ratings_info[ratings_info[:, 1] == player[1]]

        # get the most similar player name from the fifa dataframe
        most_similar_player = get_most_similar(player[0], ratings_players[:, 0])

        # add the most similar player name to the list
        player_name_mapped.append(most_similar_player)

    # add the list to the transfers dataframe
    transfers['player_name_mapped'] = player_name_mapped

    return transfers

In [52]:
# let's map the transfers to the fifa data
transfers_22 = map_transfers(transfers_22, ratings_22)
transfers_21 = map_transfers(transfers_21, ratings_21)
transfers_20 = map_transfers(transfers_20, ratings_20)


100%|██████████| 461/461 [00:02<00:00, 210.61it/s]
100%|██████████| 515/515 [00:02<00:00, 213.47it/s]
100%|██████████| 498/498 [00:02<00:00, 223.48it/s]


In [53]:
# function to merge the transfers with the fifa data
def merge(transfers, ratings):
    # keep in memory the initial transfers columns
    transfers_columns = transfers.columns

    # first, try to merge using the short_name and club_name
    merged = pd.merge(transfers, ratings, on = ['short_name', 'club_name'], how = 'left', indicator=True)

    # create a dataframe for the rows where merge==both and one for the rows where merge==left_only
    first_merge_success = merged[merged['_merge'] == 'both'].reset_index(drop = True)
    first_merge_fail = merged[merged['_merge'] == 'left_only'].reset_index(drop = True)

    # drop the _merge column
    first_merge_success.drop(['_merge'], axis = 1, inplace = True)
    first_merge_fail.drop(['_merge'], axis = 1, inplace = True)
    
    # get the transfers data corresponding to the first merge fail
    first_merge_fail_transfers = first_merge_fail[transfers_columns].reset_index(drop = True)

    # use the add_name_mapping function to map the player names in first_merge_fail_transfers
    first_merge_fail_transfers = add_name_mapping(first_merge_fail_transfers, ratings)
    # rename the short name to previous_short_name and the player_name_mapped to short_name
    first_merge_fail_transfers.rename(columns = {'short_name': 'previous_short_name', 'player_name_mapped': 'short_name'}, inplace = True)

    # try a new merge, and only keep the inner merge
    second_merge = pd.merge(first_merge_fail_transfers, ratings, on = ['short_name', 'club_name'], how = 'inner')

    # similarities
    first_merge_success['name_similarity'] = first_merge_success.apply(lambda x: get_similarity(x['player_name'], x['short_name']), axis = 1)
    first_merge_success['club_similarity'] = first_merge_success.apply(lambda x: get_similarity(x['old_club'], x['club_name']), axis = 1)
    second_merge['name_similarity'] = second_merge.apply(lambda x: get_similarity(x['player_name'], x['short_name']), axis = 1)
    second_merge['club_similarity'] = second_merge.apply(lambda x: get_similarity(x['old_club'], x['club_name']), axis = 1)
    
    # drop the columns for which the similarity score between old_club and club_name is less than 75% of the similarity score in the first merge success
    club_similarity_threshold = first_merge_success['club_similarity'].quantile(0.25)
    second_merge = second_merge[second_merge['club_similarity'] >= club_similarity_threshold].reset_index(drop = True)
    
    # drop the columns for which the similarity score between player_name and short_name is less than 75% of the similarity score in the first merge success
    name_similarity_threshold = first_merge_success['name_similarity'].quantile(0.25)
    second_merge = second_merge[second_merge['name_similarity'] >= name_similarity_threshold].reset_index(drop = True)

    # concatenate the two dataframes
    final_merge = pd.concat([first_merge_success, second_merge], axis = 0).reset_index(drop = True)
    
    # put previous_short_name and similarity in the first columns
    final_merge = final_merge[['previous_short_name', 'name_similarity', 'club_similarity'] + final_merge.columns[:-3].tolist()]

    # drop duplicates
    final_merge.drop_duplicates(inplace = True)

    return final_merge

In [56]:
merged_22 = merge(transfers_22, ratings_22)
merged_21 = merge(transfers_21, ratings_21)
merged_20 = merge(transfers_20, ratings_20)

# order by similarity, to check if the merge was successful
merged_22.sort_values(by = 'name_similarity', ascending = True, inplace = True)
merged_21.sort_values(by = 'name_similarity', ascending = True, inplace = True)
merged_20.sort_values(by = 'name_similarity', ascending = True, inplace = True)

# print merged_22 where previous_short_name is not null
#merged_22[merged_22['previous_short_name'].notnull()].head(40)
#merged_21[merged_21['previous_short_name'].notnull()].head(40)
#merged_20[merged_20['previous_short_name'].notnull()].head(40)

100%|██████████| 1322/1322 [00:00<00:00, 2550.47it/s]
100%|██████████| 1336/1336 [00:00<00:00, 2517.14it/s]
100%|██████████| 1270/1270 [00:00<00:00, 2656.33it/s]


In [57]:
# print all shapes
print(merged_22.shape)
print(merged_21.shape)
print(merged_20.shape)

(459, 72)
(480, 72)
(405, 72)


In [74]:
merged_22[(merged_22.fee==0) & (merged_22.club_contract_valid_until==2023)]
#merged_21[merged_21.fee==0].club_contract_valid_until.value_counts()
#merged_20[merged_20.fee==0].club_contract_valid_until.value_counts()

Unnamed: 0,previous_short_name,name_similarity,club_similarity,new_club,player_name,old_club,transfer_period,fee,year,club_name,short_name,overall,potential,value_eur,wage_eur,age,height_cm,weight_kg,league_level,club_joined,club_contract_valid_until,preferred_foot,weak_foot,skill_moves,international_reputation,release_clause_eur,pace,shooting,passing,dribbling,defending,physic,attacking_crossing,attacking_finishing,attacking_heading_accuracy,attacking_short_passing,attacking_volleys,skill_dribbling,skill_curve,skill_fk_accuracy,skill_long_passing,skill_ball_control,movement_acceleration,movement_sprint_speed,movement_agility,movement_reactions,movement_balance,power_shot_power,power_jumping,power_stamina,power_strength,power_long_shots,mentality_aggression,mentality_interceptions,mentality_positioning,mentality_vision,mentality_penalties,mentality_composure,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,position_A,position_D,position_G,position_M,attack_work_rate,defense_work_rate
499,,57,90,FC Emmen,Metehan Güçlü,Stade Rennais,1,0.0,2022,Stade Rennais FC,M. Güçlü,61.0,66.0,500000.0,7000.0,22.0,182.0,70.0,1.0,2021.0,2023.0,0.0,2.0,2.0,1.0,988000.0,75.0,60.0,54.0,61.0,23.0,56.0,42.0,60.0,62.0,60.0,56.0,59.0,47.0,48.0,49.0,60.0,73.0,76.0,75.0,58.0,66.0,60.0,71.0,48.0,64.0,59.0,44.0,21.0,59.0,60.0,66.0,51.0,18.0,16.0,20.0,7.0,13.0,7.0,10.0,13.0,1.0,0.0,0.0,0.0,2.0,2.0
128,,70,58,AS Roma,Nemanja Matić,Man Utd,1,0.0,2022,Manchester United,N. Matić,79.0,79.0,11500000.0,100000.0,32.0,194.0,83.0,1.0,2021.0,2023.0,0.0,3.0,3.0,3.0,21300000.0,36.0,67.0,75.0,73.0,80.0,73.0,70.0,59.0,74.0,82.0,71.0,75.0,70.0,63.0,82.0,79.0,33.0,39.0,47.0,80.0,47.0,84.0,63.0,54.0,82.0,67.0,76.0,80.0,63.0,68.0,64.0,83.0,82.0,81.0,72.0,7.0,15.0,12.0,14.0,9.0,0.0,0.0,0.0,1.0,1.0,2.0
515,,73,100,RKC Waalwijk,Patrick Vroegh,Vitesse,1,0.0,2022,Vitesse,P. Vroegh,64.0,75.0,1300000.0,3000.0,21.0,175.0,67.0,1.0,2021.0,2023.0,0.0,3.0,2.0,1.0,2200000.0,58.0,54.0,63.0,64.0,56.0,58.0,52.0,48.0,46.0,68.0,50.0,61.0,57.0,57.0,67.0,67.0,67.0,51.0,67.0,63.0,67.0,64.0,56.0,64.0,56.0,57.0,58.0,54.0,61.0,63.0,44.0,64.0,55.0,61.0,55.0,10.0,7.0,5.0,12.0,11.0,0.0,0.0,0.0,1.0,2.0,2.0
112,,74,100,West Bromwich Albion,Martin Kelly,Crystal Palace,1,0.0,2022,Crystal Palace,M. Kelly,74.0,74.0,2900000.0,39000.0,31.0,191.0,77.0,1.0,2021.0,2023.0,1.0,3.0,2.0,2.0,5500000.0,50.0,44.0,59.0,61.0,76.0,68.0,66.0,38.0,77.0,68.0,39.0,58.0,60.0,35.0,64.0,67.0,49.0,51.0,53.0,75.0,49.0,61.0,70.0,49.0,77.0,38.0,71.0,73.0,42.0,39.0,58.0,72.0,76.0,77.0,75.0,7.0,11.0,8.0,7.0,8.0,0.0,1.0,0.0,0.0,1.0,2.0
500,,74,90,Excelsior Rotterdam,Yassin Ayoub,Panathinaikos,1,0.0,2022,Panathinaikos FC,Y. Ayoub,69.0,70.0,1600000.0,500.0,27.0,174.0,70.0,1.0,2021.0,2023.0,0.0,3.0,3.0,1.0,3600000.0,69.0,64.0,71.0,71.0,65.0,68.0,73.0,59.0,52.0,71.0,64.0,70.0,71.0,69.0,67.0,70.0,71.0,68.0,74.0,70.0,85.0,73.0,38.0,69.0,68.0,65.0,74.0,64.0,66.0,72.0,66.0,70.0,65.0,69.0,67.0,16.0,10.0,8.0,16.0,9.0,0.0,0.0,0.0,1.0,2.0,2.0
205,,74,100,Getafe CF,Fabrizio Angileri,River Plate,1,0.0,2022,River Plate,F. Angileri,77.0,78.0,11000000.0,17000.0,27.0,184.0,84.0,1.0,2021.0,2023.0,0.0,3.0,3.0,1.0,16200000.0,80.0,68.0,74.0,74.0,70.0,79.0,81.0,63.0,61.0,73.0,69.0,74.0,79.0,63.0,75.0,71.0,78.0,82.0,82.0,74.0,70.0,78.0,60.0,90.0,77.0,69.0,77.0,72.0,71.0,68.0,65.0,74.0,69.0,72.0,72.0,8.0,8.0,11.0,9.0,8.0,0.0,1.0,0.0,0.0,3.0,3.0
62,,74,72,SV Werder Bremen,Oliver Burke,Sheff Utd,1,0.0,2022,Sheffield United,O. Burke,67.0,72.0,1600000.0,9000.0,24.0,188.0,82.0,2.0,2021.0,2023.0,1.0,3.0,3.0,1.0,3200000.0,88.0,63.0,62.0,66.0,37.0,71.0,63.0,64.0,54.0,65.0,52.0,71.0,62.0,62.0,56.0,59.0,85.0,91.0,69.0,68.0,69.0,74.0,63.0,68.0,79.0,56.0,58.0,36.0,66.0,59.0,42.0,60.0,34.0,38.0,27.0,13.0,11.0,7.0,7.0,15.0,1.0,0.0,0.0,0.0,2.0,2.0
810,O. Yokuslu,76,90,West Bromwich Albion,Okay Yokuslu,Celta de Vigo,1,0.0,2022,RC Celta de Vigo,O. Yokuşlu,76.0,79.0,8500000.0,21000.0,27.0,191.0,79.0,1.0,2021.0,2023.0,1.0,3.0,3.0,1.0,18500000.0,66.0,70.0,71.0,72.0,74.0,83.0,67.0,65.0,78.0,74.0,70.0,73.0,72.0,65.0,72.0,76.0,62.0,69.0,61.0,70.0,60.0,79.0,69.0,87.0,83.0,75.0,81.0,76.0,68.0,71.0,60.0,73.0,71.0,75.0,70.0,12.0,16.0,10.0,13.0,7.0,0.0,0.0,0.0,1.0,2.0,3.0
535,,76,100,Olympique Marseille,Alexis Sánchez,Inter,1,0.0,2022,Inter,A. Sánchez,80.0,80.0,17000000.0,105000.0,32.0,168.0,62.0,1.0,2021.0,2023.0,1.0,4.0,4.0,4.0,28900000.0,78.0,76.0,78.0,83.0,43.0,68.0,76.0,74.0,59.0,79.0,82.0,84.0,78.0,74.0,77.0,83.0,80.0,76.0,84.0,79.0,87.0,80.0,85.0,68.0,68.0,76.0,64.0,46.0,81.0,80.0,69.0,75.0,38.0,44.0,35.0,10.0,10.0,15.0,12.0,13.0,1.0,0.0,0.0,0.0,2.0,2.0
838,Ó. Mingueza,76,86,Celta de Vigo,Óscar Mingueza,Barcelona,1,0.0,2022,FC Barcelona,Mingueza,75.0,83.0,12000000.0,76000.0,22.0,184.0,77.0,1.0,2021.0,2023.0,1.0,3.0,2.0,1.0,27000000.0,78.0,35.0,66.0,60.0,75.0,75.0,68.0,22.0,74.0,76.0,36.0,48.0,42.0,39.0,69.0,69.0,74.0,82.0,69.0,74.0,62.0,61.0,75.0,80.0,73.0,35.0,74.0,77.0,44.0,55.0,32.0,70.0,74.0,76.0,72.0,9.0,10.0,15.0,8.0,13.0,0.0,1.0,0.0,0.0,3.0,2.0


In [22]:
# print files to csv
merged_22.to_csv('data/Final/merged_22.csv', index = False)
merged_21.to_csv('data/Final/merged_21.csv', index = False)
merged_20.to_csv('data/Final/merged_20.csv', index = False)