In [1]:
import numpy as np
import pandas as pd

# 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ml algoritems
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neighbors import NearestNeighbors
from sklearn import tree
from sklearn.pipeline import Pipeline
from sklearn.neighbors import NeighborhoodComponentsAnalysis

# score tests 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# 
from pandas.plotting import radviz
import math
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns
from matplotlib.colors import ListedColormap
%matplotlib inline
from sklearn.metrics import PrecisionRecallDisplay

In [2]:
cols = list(range(1,17))
df = pd.read_csv(r'Players_2_Normalized.csv', usecols=cols)
df

Unnamed: 0,Position,Pace,Shooting,Passing,Dribbling,Defending,Physicality,Agility,Balance,Marking,Positioning,Sprint_Speed,Vision,Finishing,Player_Height,attack_contribution
0,1,0.167401,0.200441,0.178414,0.189427,0.074890,0.189427,0.866667,0.202899,0.188235,0.8875,0.714286,0.8375,0.953488,0.818182,0.893965
1,3,0.167038,0.160356,0.198218,0.202673,0.131403,0.140312,0.826667,0.855072,0.564706,0.9000,0.685714,0.9625,0.744186,0.250000,0.746451
2,2,0.204598,0.179310,0.195402,0.211494,0.066667,0.142529,0.946667,0.942029,0.188235,0.8500,0.857143,0.9000,0.813953,0.250000,0.937982
3,3,0.130621,0.173448,0.182013,0.175589,0.167024,0.171306,0.706667,0.695652,0.705882,0.8375,0.528571,0.8750,0.779070,0.545455,0.634857
4,1,0.182898,0.204276,0.152019,0.192399,0.095012,0.173397,0.853333,0.695652,0.188235,0.9625,0.685714,0.6500,0.953488,0.409091,0.882435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4017,2,0.194581,0.192118,0.177340,0.184729,0.078818,0.172414,0.760000,0.666667,0.423529,0.7875,0.785714,0.7125,0.790698,0.409091,0.791769
4018,3,0.164352,0.155093,0.162037,0.173611,0.162037,0.182870,0.746667,0.840580,0.729412,0.6750,0.628571,0.6875,0.651163,0.318182,0.639426
4019,2,0.206633,0.168367,0.173469,0.209184,0.099490,0.142857,0.866667,0.811594,0.364706,0.6625,0.671429,0.7000,0.709302,0.318182,0.838967
4020,3,0.198630,0.132420,0.150685,0.164384,0.168950,0.184932,0.800000,0.782609,0.800000,0.6625,0.871429,0.6750,0.616279,0.545455,0.629249


### my models:
 * SVC
 * Decision_Tree
 * Knn


In [3]:
def svc(X_train, y_train, X_test, y_test):
    model = SVC(C=10, kernel='linear')
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    return y_pred

In [4]:
def decision_tree(X_train, y_train, X_test, y_test):
    model = tree.DecisionTreeClassifier()
    model.fit(X_train, y_train)
    prediction = model.predict(X_test)
    return prediction

In [5]:
def knn(X_train, y_train, X_test, y_test):
    sc_X = StandardScaler()
    X_train = sc_X.fit_transform(X_train)
    X_test = sc_X.transform(X_test)
    
    classifier = KNeighborsClassifier(n_neighbors=11, n_jobs=-1)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_test)
    return y_pred

# data split

In [6]:
X = df.iloc[:,1:17]
y = df.iloc[:,0]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

## the scors 👨🏽‍🏫

 * SVC Model score: 

In [7]:
y_pred= svc(X_train, y_train, X_test, y_test)
print("%.6f" % precision_score(y_test, y_pred, average='macro'))
print("%.6f" % recall_score(y_test, y_pred, average='macro'))
print("%.6f" % f1_score(y_test, y_pred, average='macro'))
print("%.6f" % accuracy_score(y_test, y_pred))
confusion_matrix(y_test, y_pred)

0.844604
0.847050
0.845581
0.843478


array([[127,  21,   0,   0,   0],
       [ 24, 146,  10,   8,   0],
       [  0,   9, 134,  13,   7],
       [  0,   1,  11, 136,   8],
       [  0,   0,   8,   6, 136]], dtype=int64)

 * Decision Tree Model score: 

In [8]:
y_pred = decision_tree(X_train, y_train, X_test, y_test)
print("%.6f" % precision_score(y_test, y_pred, average='macro'))
print("%.6f" % recall_score(y_test, y_pred, average='macro'))
print("%.6f" % f1_score(y_test, y_pred, average='macro'))
print("%.6f" % accuracy_score(y_test, y_pred))
confusion_matrix(y_test, y_pred)

0.812593
0.814977
0.812851
0.809938


array([[128,  20,   0,   0,   0],
       [ 33, 134,  13,   7,   1],
       [  1,  11, 132,  14,   5],
       [  0,   4,  18, 127,   7],
       [  0,   0,   7,  12, 131]], dtype=int64)

 * knn Model score:

In [9]:
y_pred = knn(X_train, y_train, X_test, y_test)
print("%.6f" % precision_score(y_test, y_pred, average='macro'))
print("%.6f" % recall_score(y_test, y_pred, average='macro'))
print("%.6f" % f1_score(y_test, y_pred, average='macro'))
print("%.6f" % accuracy_score(y_test, y_pred))
confusion_matrix(y_test, y_pred)


0.867752
0.868484
0.867690
0.865839


array([[127,  21,   0,   0,   0],
       [ 14, 152,  13,   9,   0],
       [  0,   5, 141,  11,   6],
       [  0,   0,   7, 140,   9],
       [  0,   0,   8,   5, 137]], dtype=int64)

In [10]:
y_pred_svc= svc(X_train, y_train, X_test, y_test)
y_pred_dt = decision_tree(X_train, y_train, X_test, y_test)
y_pred_knn = knn(X_train, y_train, X_test, y_test)

name = ["SVC", "Decision Tree", "KNN"]

recall = [recall_score(y_test, y_pred_svc, average='macro'), recall_score(y_test, y_pred_dt, average='macro'), recall_score(y_test, y_pred_knn, average='macro')]

f1 = [f1_score(y_test, y_pred_svc, average='macro'), f1_score(y_test, y_pred_dt, average='macro'), f1_score(y_test, y_pred_knn, average='macro')]

accuracy_s = [accuracy_score(y_test, y_pred_svc), accuracy_score(y_test, y_pred_dt), accuracy_score(y_test, y_pred_knn)]

precision = [precision_score(y_test, y_pred_svc, average='macro'), precision_score(y_test, y_pred_dt, average='macro'), precision_score(y_test, y_pred_knn, average='macro')]

accuracy_df = pd.DataFrame({"model":name,"recall":recall, "f1":f1, "accuracy_s":accuracy_s, "precision":precision})
accuracy_df

Unnamed: 0,model,recall,f1,accuracy_s,precision
0,SVC,0.84705,0.845581,0.843478,0.844604
1,Decision Tree,0.800972,0.79973,0.796273,0.799721
2,KNN,0.868484,0.86769,0.865839,0.867752


### we can see that KNN gave us the best results 86% 