# Find Similar Players

### Import Libraries

In [38]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
import numpy as np

### Reading Datasets

In [39]:
players_df = pd.read_csv('players_df.csv')
gk_df = pd.read_csv('goalkeepers_df.csv')
eafc_pl_df = pd.read_csv('players_fifa_df.csv')
eafc_gk_df = pd.read_csv('goalkeepers_fifa_df.csv')

### Dropping Columns not to consider

In [40]:
eafc_pl_df = eafc_pl_df.drop(['Foot', 'Height', 'Weight', 'Nationality',
       'Expiration', 'Non-PenaltyGoals', 'npxG:Non-PenaltyxG', 'ShotsTotal',
       'Assists', 'xAG:Exp.AssistedGoals', 'npxG+xAG', 'Shot-CreatingActions',
       'PassesAttempted', 'PassCompletion%', 'ProgressivePasses',
       'ProgressiveCarries', 'SuccessfulTake-Ons', 'Touches(AttPen)',
       'ProgressivePassesRec', 'Tackles', 'Interceptions_x', 'Blocks',
       'Clearances', 'AerialsWon','Birth', 'Role'], axis=1)
eafc_pl_df = eafc_pl_df.dropna()
eafc_gk_df = eafc_gk_df.drop(['Foot', 'Height', 'Weight', 'Nationality',
       'Expiration', 'PSxG-GA', 'GoalsAgainst', 'SavePercentage', 'PSxG/SoT',
       'Save%(PenaltyKicks)', 'CleanSheetPercentage', 'Touches', 'Launch%',
       'GoalKicks', 'Avg.LengthofGoalKicks', 'CrossesStopped%',
       'Def.ActionsOutsidePen.Area', 'Avg.DistanceofDef.Actions', 'Role',
       'Birth'], axis=1)
eafc_gk_df = eafc_gk_df.dropna()

In [41]:
def find_similar_players(df: pd.DataFrame, player_index=None, player_name=None, top_n=5) -> pd.DataFrame:
    '''
    Finds the top_n similar players in the dataframe given to the player specified using cosine similarity
    
    Parameters
    ----------
    df : dict
        Dictionary containing players to be compared
    player_index : int
        Index in the dataframe of the player to get similar players of
    player_name : dict
        Name of the player to get similar players of
    top_n : int
        Number of most similar players to be returned
        
    Returns
    ----------
    df: dictionary
        Dictionary having top_n similar players in the dataframe
    '''
    if player_index is None and player_name is None:
        return None
    if player_name is not None:
        player_index = df.index[df['Player'] == player_name]
        if len(player_index)==0:
            print('Player not found')
            raise ValueError('Player not found')
    df_copy = df.drop(['Age', 'Wage', 'Value'], axis=1)
    df_copy = df_copy.select_dtypes(include=['float64'])
    features = df_copy.columns
    scaler = StandardScaler()
    players_scaled = scaler.fit_transform(df_copy)
    players_scaled_df = pd.DataFrame(players_scaled, columns=features, index=df_copy.index)
    player_features = players_scaled_df.iloc[player_index].values
    distances = np.sqrt(np.sum((players_scaled_df - player_features) ** 2, axis=1))
    similar_indices = np.argsort(distances)[1:top_n+1]    
    return df.iloc[similar_indices]

In [46]:
df_fin = find_similar_players(players_df, player_name = 'Erling Haaland')
df_fin[['Player', 'Club', 'League', 'Age', 'Position', 'Value', 'Age']]


Unnamed: 0,Player,Club,League,Age,Position,Value,Age.1
1038,Dušan Vlahović,Juventus,Italian Serie A,24,Centre-Forward,65.0,24
449,Artem Dovbyk,Girona,Spanish La Liga,26,Centre-Forward,30.0,26
428,Robert Lewandowski,Barcelona,Spanish La Liga,35,Centre-Forward,15.0,35
460,Álvaro Morata,Atlético Madrid,Spanish La Liga,31,Centre-Forward,20.0,31
423,Joselu,Real Madrid,Spanish La Liga,34,Centre-Forward,5.0,34


In [47]:
df_fin = find_similar_players(eafc_pl_df, player_name = 'Erling Haaland')
df_fin[['Player', 'Club', 'League', 'Age', 'Position', 'Value', 'Age']]

Unnamed: 0,Player,Club,League,Age,Position,Value,Age.1
1139,Victor Osimhen,Napoli,Italian Serie A,25,Centre-Forward,110.0,25
22,Darwin Núñez,Liverpool,English Premier League,24,Centre-Forward,70.0,24
90,Callum Wilson,Newcastle United,English Premier League,32,Centre-Forward,13.0,32
1021,Lautaro Martinez,Internazionale,Italian Serie A,26,Centre-Forward,110.0,26
1060,Dušan Vlahović,Juventus,Italian Serie A,24,Centre-Forward,65.0,24
