# Find Similar Players

### Import Libraries

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy.spatial import distance
import numpy as np

### Reading Datasets

In [3]:
players_df = pd.read_csv('players_df.csv')
gk_df = pd.read_csv('goalkeepers_df.csv')
eafc_pl_df = pd.read_csv('players_fifa_df.csv')
eafc_gk_df = pd.read_csv('goalkeepers_fifa_df.csv')

### Dropping Columns not to consider

In [4]:
eafc_pl_df = eafc_pl_df.drop(['Foot', 'Height', 'Weight', 'Nationality',
       'Expiration', 'Non-PenaltyGoals', 'npxG:Non-PenaltyxG', 'ShotsTotal',
       'Assists', 'xAG:Exp.AssistedGoals', 'npxG+xAG', 'Shot-CreatingActions',
       'PassesAttempted', 'PassCompletion%', 'ProgressivePasses',
       'ProgressiveCarries', 'SuccessfulTake-Ons', 'Touches(AttPen)',
       'ProgressivePassesRec', 'Tackles', 'Interceptions_x', 'Blocks',
       'Clearances', 'AerialsWon','Birth', 'Role'], axis=1)
eafc_pl_df = eafc_pl_df.dropna()
eafc_gk_df = eafc_gk_df.drop(['Foot', 'Height', 'Weight', 'Nationality', 'Wage',
       'Expiration', 'PSxG-GA', 'GoalsAgainst', 'SavePercentage', 'PSxG/SoT',
       'Save%(PenaltyKicks)', 'CleanSheetPercentage', 'Touches', 'Launch%',
       'GoalKicks', 'Avg.LengthofGoalKicks', 'CrossesStopped%',
       'Def.ActionsOutsidePen.Area', 'Avg.DistanceofDef.Actions', 'Role',
       'Birth'], axis=1)
eafc_gk_df = eafc_gk_df.dropna()

In [5]:
def find_similar_players(df: pd.DataFrame, player_index=None, player_name=None, top_n=5) -> pd.DataFrame:
    '''
    Finds the top_n similar players in the dataframe given to the player specified using cosine similarity
    
    Parameters
    ----------
    df : dict
        Dictionary containing players to be compared
    player_index : int
        Index in the dataframe of the player to get similar players of
    player_name : dict
        Name of the player to get similar players of
    top_n : int
        Number of most similar players to be returned
        
    Returns
    ----------
    df: dictionary
        Dictionary having top_n similar players in the dataframe
    '''
    if player_index is None and player_name is None:
        return None
    if player_name is not None:
        player_index = df.index[df['Player'] == player_name]
    df_copy = df.drop(['Age', 'Wage', 'Value'], axis=1)
    df_copy = df_copy.select_dtypes(include=['float64'])
    features = df_copy.columns
    scaler = StandardScaler()
    players_scaled = scaler.fit_transform(df_copy)
    players_scaled_df = pd.DataFrame(players_scaled, columns=features, index=df_copy.index)
    player_features = players_scaled_df.iloc[player_index].values
    distances = np.sqrt(np.sum((players_scaled_df - player_features) ** 2, axis=1))
    similar_indices = np.argsort(distances)[1:top_n+1]    
    return df.iloc[similar_indices]

In [6]:
df_fin = find_similar_players(players_df, player_name = 'Karim Adeyemi')
df_fin[['Player', 'Club', 'League', 'Age', 'Position', 'Value', 'Age']]


Unnamed: 0,Player,Club,League,Age,Position,Value,Age.1
1012,Christian Pulisic,Milan,Italian Serie A,25,Right Winger,32.0,25
108,Jarrod Bowen,West Ham United,English Premier League,27,Right Winger,50.0,27
135,Simon Adingra,Brighton & Hove Albion,English Premier League,22,Left Winger,30.0,22
82,Anthony Gordon,Newcastle United,English Premier League,23,Left Winger,50.0,23
1039,Federico Chiesa,Juventus,Italian Serie A,26,Left Winger,40.0,26


In [9]:
df_fin = find_similar_players(eafc_pl_df, player_name = 'Erling Haaland')
df_fin[['Player', 'Club', 'League', 'Age', 'Position', 'Value', 'Age']]

Unnamed: 0,Player,Club,League,Age,Position,Value,Age.1
1264,Pol Lirola,Frosinone,Italian Serie A,26,Right-Back,2.0,26
1240,Bartosz Bereszyński,Empoli,Italian Serie A,31,Right-Back,1.5,31
739,Gabriel Gudmundsson,Lille,French Ligue 1,25,Left-Back,4.0,25
990,Olivier Deman,Werder Bremen,German Bundesliga,24,Left Midfield,6.0,24
716,Kenny Lala,Brest,French Ligue 1,32,Right-Back,3.0,32
