# Player Similarity Prediction

Two players are similar if they have attributes similar to each other. This helps football teams replace a player if one leaves/retires. It also helps teams scout potential future players. However, in our model, by similar we mean stats skewed in a similar fashion, not necessarily values being similar. By that we mean, Player A can be similar to Player B even if he has 2x the values for shooting, passing, dribbling, pace, physical, defending.

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

predata=pd.read_csv("/content/drive/MyDrive/players_22.csv")
predata

Unnamed: 0,short_name,player_positions,overall,potential,wage_eur,age,club_contract_valid_until,nationality_name,preferred_foot,weak_foot,...,defending_marking_awareness,defending_standing_tackle,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed,value_eur
0,L. Messi,"RW, ST, CF",93,93,320000,34,2023,Argentina,Left,4,...,20,35,24,6,11,15,14,8,0,78000000
1,R. Lewandowski,ST,92,92,270000,32,2023,Poland,Right,4,...,35,42,19,15,6,12,8,10,0,119500000
2,Cristiano Ronaldo,"ST, LW",91,91,270000,36,2023,Portugal,Right,4,...,24,32,24,7,11,15,14,11,0,45000000
3,Neymar Jr,"LW, CAM",91,91,270000,29,2025,Brazil,Right,5,...,35,32,29,9,9,15,15,11,0,129000000
4,K. De Bruyne,"CM, CAM",91,91,350000,30,2025,Belgium,Right,5,...,68,65,53,15,13,5,10,13,0,125500000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,Song Defu,CDM,47,52,1000,22,2021,China PR,Right,3,...,38,43,48,6,10,5,15,13,0,70000
19235,C. Porter,CM,47,59,500,19,2021,Republic of Ireland,Right,3,...,37,44,47,11,12,6,8,10,0,110000
19236,N. Logue,CM,47,55,500,21,2021,Republic of Ireland,Right,3,...,38,44,48,8,6,7,10,6,0,100000
19237,L. Rudden,ST,47,60,500,19,2021,Republic of Ireland,Right,3,...,10,14,11,7,10,7,14,15,0,110000


In [None]:
x=predata.iloc[:,0:2]
y=predata['nationality_name']
x.join(y)

Unnamed: 0,short_name,player_positions,nationality_name
0,L. Messi,"RW, ST, CF",Argentina
1,R. Lewandowski,ST,Poland
2,Cristiano Ronaldo,"ST, LW",Portugal
3,Neymar Jr,"LW, CAM",Brazil
4,K. De Bruyne,"CM, CAM",Belgium
...,...,...,...
19234,Song Defu,CDM,China PR
19235,C. Porter,CM,Republic of Ireland
19236,N. Logue,CM,Republic of Ireland
19237,L. Rudden,ST,Republic of Ireland


In [None]:
data=predata.drop(['short_name','nationality_name','player_positions','club_contract_valid_until'],axis=1, inplace=False)
data= pd.get_dummies(data, columns = ['preferred_foot'])
data

Unnamed: 0,overall,potential,wage_eur,age,weak_foot,skill_moves,release_clause_eur,pace,shooting,passing,...,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed,value_eur,preferred_foot_Left,preferred_foot_Right
0,93,93,320000,34,4,4,144300000,85,92,91,...,24,6,11,15,14,8,0,78000000,1,0
1,92,92,270000,32,4,4,197200000,78,92,79,...,19,15,6,12,8,10,0,119500000,0,1
2,91,91,270000,36,4,5,83300000,87,94,80,...,24,7,11,15,14,11,0,45000000,0,1
3,91,91,270000,29,5,5,238700000,91,83,86,...,29,9,9,15,15,11,0,129000000,0,1
4,91,91,350000,30,5,4,232200000,76,86,93,...,53,15,13,5,10,13,0,125500000,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,47,52,1000,22,3,2,114000,58,35,46,...,48,6,10,5,15,13,0,70000,0,1
19235,47,59,500,19,3,2,193000,59,39,50,...,47,11,12,6,8,10,0,110000,0,1
19236,47,55,500,21,3,2,175000,60,37,45,...,48,8,6,7,10,6,0,100000,0,1
19237,47,60,500,19,3,2,239000,68,46,36,...,11,7,10,7,14,15,0,110000,0,1


In [None]:
data.astype('int64')
data.dtypes

overall                        int64
potential                      int64
wage_eur                       int64
age                            int64
weak_foot                      int64
skill_moves                    int64
release_clause_eur             int64
pace                           int64
shooting                       int64
passing                        int64
dribbling                      int64
defending                      int64
physic                         int64
attacking_crossing             int64
attacking_finishing            int64
attacking_heading_accuracy     int64
attacking_short_passing        int64
attacking_volleys              int64
skill_dribbling                int64
skill_curve                    int64
skill_fk_accuracy              int64
skill_long_passing             int64
skill_ball_control             int64
movement_acceleration          int64
movement_sprint_speed          int64
movement_agility               int64
movement_reactions             int64
m

In [None]:
scaler = MinMaxScaler()

new_data = scaler.fit_transform(data.to_numpy())
new_data = pd.DataFrame(new_data, columns = ["overall", "potential", "wage_eur", "age", "weak_foot", "skill_moves", "release_clause_eur", "pace", "shooting",
                                             "passing", "dribbling", "defending", "physic", "attacking_crossing", "attacking_finishing", "attacking_heading_accuracy",
                                             "attacking_short_passing", "attacking_volleys", "skill_dribbling", "skill_curve", "skill_fk_accuracy", "skill_long_passing",
                                             "skill_ball_control", "movement_acceleration", "movement_sprint_speed", "movement_agility", "movement_reactions", "movement_balance",
                                             "power_shot_power", "power_jumping", "power_stamina", "power_strength", "power_long_shots", "mentality_aggression",
                                             "mentality_interceptions", "mentality_positioning", "mentality_vision", "mentality_penalties", "mentality_composure",
                                             "defending_marking_awareness", "defending_standing_tackle", "defending_sliding_tackle", "goalkeeping_diving", "goalkeeping_handling",
                                             "goalkeeping_kicking", "goalkeeping_positioning", "goalkeeping_reflexes", "goalkeeping_speed", "value_eur", "preferred_foot_Left",
                                             "preferred_foot_Right"])

new_data

Unnamed: 0,overall,potential,wage_eur,age,weak_foot,skill_moves,release_clause_eur,pace,shooting,passing,...,defending_sliding_tackle,goalkeeping_diving,goalkeeping_handling,goalkeeping_kicking,goalkeeping_positioning,goalkeeping_reflexes,goalkeeping_speed,value_eur,preferred_foot_Left,preferred_foot_Right
0,1.000000,0.956522,0.914163,0.473684,0.75,0.75,0.386345,0.826087,0.973684,0.970588,...,0.218391,0.044944,0.100000,0.142857,0.133333,0.068182,0.0,0.402034,1.0,0.0
1,0.978261,0.934783,0.771102,0.421053,0.75,0.75,0.527979,0.724638,0.973684,0.794118,...,0.160920,0.146067,0.044444,0.109890,0.066667,0.090909,0.0,0.615962,0.0,1.0
2,0.956522,0.913043,0.771102,0.526316,0.75,1.00,0.223025,0.855072,1.000000,0.808824,...,0.218391,0.056180,0.100000,0.142857,0.133333,0.102273,0.0,0.231923,0.0,1.0
3,0.956522,0.913043,0.771102,0.342105,1.00,1.00,0.639090,0.913043,0.855263,0.897059,...,0.275862,0.078652,0.077778,0.142857,0.144444,0.102273,0.0,0.664933,0.0,1.0
4,0.956522,0.913043,1.000000,0.368421,1.00,0.75,0.621687,0.695652,0.894737,1.000000,...,0.551724,0.146067,0.122222,0.032967,0.088889,0.125000,0.0,0.646891,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19234,0.000000,0.065217,0.001431,0.157895,0.50,0.25,0.000305,0.434783,0.223684,0.308824,...,0.494253,0.044944,0.088889,0.032967,0.144444,0.125000,0.0,0.000314,0.0,1.0
19235,0.000000,0.217391,0.000000,0.078947,0.50,0.25,0.000517,0.449275,0.276316,0.367647,...,0.482759,0.101124,0.111111,0.043956,0.066667,0.090909,0.0,0.000521,0.0,1.0
19236,0.000000,0.130435,0.000000,0.131579,0.50,0.25,0.000469,0.463768,0.250000,0.294118,...,0.494253,0.067416,0.044444,0.054945,0.088889,0.045455,0.0,0.000469,0.0,1.0
19237,0.000000,0.239130,0.000000,0.078947,0.50,0.25,0.000640,0.579710,0.368421,0.161765,...,0.068966,0.056180,0.088889,0.054945,0.133333,0.147727,0.0,0.000521,0.0,1.0


In [None]:
# dataSetI- best player, dataSetII- one of the worst players, dataSetIII - second best player
from scipy import spatial

dataSetI = new_data.iloc[0:1,:]
dataSetII = new_data.iloc[19234:19235,:]
dataSetIII= new_data.iloc[1:2,:]
cosine_result_1 = 1 - spatial.distance.cosine(dataSetI, dataSetII)
cosine_result_1

0.8001408605559942

# Noah Lang vs. Neymar Jr.

In [None]:
dataSetI = new_data.iloc[2:3,:]
dataSetII = new_data.iloc[19234:19235,:]
dataSetIII= new_data.iloc[837:838,:]
cosine_result_1 = 1 - spatial.distance.cosine(dataSetI, dataSetIII)
cosine_result_1

0.9679250398409015

# Neymar Jr. vs. Courtois

In [None]:
dataSetI = new_data.iloc[2:3,:]
dataSetII = new_data.iloc[19234:19235,:]
dataSetIII= new_data.iloc[12:13,:]
cosine_result_1 = 1 - spatial.distance.cosine(dataSetI, dataSetIII)
cosine_result_1

0.6271465063593262

# Taking User Input

In [None]:
player_1 = input("Enter the first player index"))
player_2 = input("Enter the second player name"))


cosine_result_1 = 1 - spatial.distance.cosine(index_1, dataSetIII)


In [None]:
cosine_result_2 = 1 - spatial.distance.cosine(dataSetI, dataSetIII)
cosine_result_2

0.9499542804257373

In [None]:
# using adjusted similary cosine
mean_sets = np.sum(dataSetI + dataSetII) / 4

adjusted_cosine_result = 1 - spatial.distance.cosine(dataSetI - mean_sets, dataSetII - mean_sets)
adjusted_cosine_result

0.8001408605559942

In [None]:
mean_sets_2 = np.sum(dataSetI + dataSetIII) / 4

adjusted_cosine_result_2 = 1 - spatial.distance.cosine(dataSetI - mean_sets_2, dataSetIII - mean_sets_2)
adjusted_cosine_result_2

0.9499542804257373

In [None]:
# using euclidean and manhattan distances
from scipy.spatial import distance
euc_result = distance.cdist(dataSetI, dataSetII, "euclidean")
euc_result

array([[3.67462802]])

In [None]:
manhattan_result = distance.cdist(dataSetI, dataSetII, "cityblock")
manhattan_result

array([[21.98724506]])

In [None]:
euc_result_2 = distance.cdist(dataSetI, dataSetIII, "euclidean")
euc_result_2

array([[1.72329565]])

In [None]:
manhattan_result_2 = distance.cdist(dataSetI, dataSetIII, "cityblock")
manhattan_result_2

array([[7.34209323]])

In [None]:
# Canberra distance
canberra_result_1 = distance.canberra(dataSetI, dataSetII)
canberra_result_1

21.193782671331665

In [None]:
canberra_result_2 = distance.canberra(dataSetI, dataSetIII)
canberra_result_2

7.349674667515636

In [None]:
# Minkowski distance
min_1 = distance.minkowski(dataSetI, dataSetII)
min_1

3.674628021618717

In [None]:
min_2 = distance.minkowski(dataSetI, dataSetIII)
min_2

1.7232956541076276

In [None]:
# Jenson Shannon distance
js_1 = distance.jensenshannon(dataSetI, dataSetII)
js_1

  p = p / np.sum(p, axis=0)
  q = q / np.sum(q, axis=0)


array([nan,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0., nan,  0., nan, nan])