## SVM Machine Learning Model

Defined a multiclass SVM machine learning model using libraries from sklearn to classify a player into Attacker, Midfielder, Defender and Goalkeeper

In [9]:
# Libraries
import matplotlib.pyplot as plt
import pandas as pd
import math
import numpy as np
import datetime as DT
from sklearn import svm
from sklearn import datasets, linear_model
from dataloader import loader_mlmodel
from dataloader import playerpos
import bokeh
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, ranges, LabelSet


df=loader_mlmodel()
mapp = {'GK': 0,'CB': 0,'LCB': 0,'RCB': 0,'LB':0,'RB': 0,'RWB': 0,'LWB': 0,'CM' : 0,'RM' : 0,'LDM': 0,'LAM': 0,'RDM': 0,'RAM': 0,'RCM' : 0,'LCM' : 0,'CDM': 0,'CAM': 0,'LM' : 0,'RM' : 0,'LW': 1,'RW': 1,'LF': 1,'CF': 1,'RF': 1,'RS': 1,'ST': 1,'LS' : 1}
Ystriker =  df['club_pos'].map(mapp)
Features = df[['Vision','StandingTackle','ShortPassing','Crossing','Positioning','GKHandling','BallControl','Aggression']]
Featuresnp=Features.values

### Feature importance for striker

Here, we used a SVM classifier to classify players into strikers and non-strikers. From this, we can understand which characteristics or traits are essential for a striker

In [10]:
Ynp=Ystriker.values
clf = svm.SVC(decision_function_shape='ovo',kernel='linear')
clf.fit(Featuresnp, Ynp) 


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [11]:
names=['Vision','StandingTackle','ShortPassing','Crossing','Positioning','GKHandling','BallControl','Aggression']
scoreslist=clf.coef_.T
scoreslist=scoreslist.reshape(8,)

In [12]:
output_notebook()
source = ColumnDataSource(dict(x=names,y=scoreslist))
p = figure(y_range=names, plot_width=700, plot_height=400, title='Attribute Scores')
p.yaxis.major_label_orientation = np.pi/4
p.hbar(y=names, height=0.5, left=0, right=scoreslist, color="firebrick" )
p.xaxis.axis_label = 'Score'
show(p)

### Feature importance for midfielder

Here, we used a SVM classifier to classify players into midfielders and non-midfielders. From this, we can understand which characteristics or traits are essential for a midfielder

In [13]:
mapmid = {'GK': 0,'CB': 0,'LCB': 0,'RCB': 0,'LB':0,'RB': 0,'RWB': 0,'LWB': 0,'CM' : 1,'RM' : 1,'LDM': 1,'LAM': 1,'RDM': 1,'RAM': 1,'RCM' : 1,'LCM' : 1,'CDM': 1,'CAM': 1,'LM' : 1,'RM' : 1,'LW': 0,'RW': 0,'LF': 0, 'CF': 0, 'RF': 0,'RS': 0,'ST': 0,'LS': 0}
Ymid =  df['club_pos'].map(mapmid)
Ynp=Ymid.values
clf.fit(Featuresnp, Ynp) 



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [14]:
clf.coef_

array([[ 0.11395338,  0.0035788 ,  0.07125402, -0.01567535, -0.01005894,
        -0.04423414, -0.03834362, -0.0236355 ]])

In [15]:
scoreslist=clf.coef_.T
scoreslist=scoreslist.reshape(8,)
output_notebook()
source = ColumnDataSource(dict(x=names,y=scoreslist))
p = figure(y_range=names, plot_width=700, plot_height=400, title='Attribute Scores')
p.yaxis.major_label_orientation = np.pi/4
p.hbar(y=names, height=0.5, left=0, right=scoreslist, color="firebrick" )
p.xaxis.axis_label = 'Score'
show(p)

### Feature importance for defender

Here, we used a SVM classifier to classify players into defenders and non-defenders. From this, we can understand which characteristics or traits are essential for a defender

In [16]:
mapdef = {'GK': 0,'CB': 1,'LCB': 1,'RCB': 1,'LB':1,'RB': 1,'RWB': 1,'LWB': 1,'CM' : 0,'RM' : 0,'LDM': 0,'LAM': 0,'RDM': 0,'RAM': 0,'RCM' : 0,'LCM' : 0,'CDM': 0,'CAM': 0,'LM' : 0,'RM' : 0,'LW': 0,'RW': 0,'LF': 0, 'CF': 0, 'RF': 0,'RS': 0,'ST': 0,'LS' : 0}

Ydef =  df['club_pos'].map(mapdef)
Ynp=Ydef.values
clf.fit(Featuresnp, Ynp) 



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [17]:
clf.coef_

array([[-0.08853783,  0.16182968, -0.1387647 ,  0.07453459, -0.02576472,
        -0.05460732, -0.01706757, -0.02182825]])

In [18]:
scoreslist=clf.coef_.T
scoreslist=scoreslist.reshape(8,)
output_notebook()
source = ColumnDataSource(dict(x=names,y=scoreslist))
p = figure(y_range=names, plot_width=700, plot_height=400, title='Attribute Scores')
p.yaxis.major_label_orientation = np.pi/4
p.hbar(y=names, height=0.5, left=0, right=scoreslist, color="firebrick" )
p.xaxis.axis_label = 'Score'
show(p)

### Goalkeeper

Here, we used a SVM classifier to classify players into goalkeepers and non-goalkeepers. From this, we can understand which characteristics or traits are essential for a goalkeeper

In [19]:
mapgk = {'GK': 1,'CB': 0,'LCB': 0,'RCB': 0,'LB':0,'RB': 0,'RWB': 0,'LWB': 0,'CM' : 0,'RM' : 0,'LDM': 0,'LAM': 0,'RDM': 0,'RAM': 0,'RCM' : 0,'LCM' : 0,'CDM': 0,'CAM': 0,'LM' : 0,'RM' : 0,'LW': 0,'RW': 0,'LF': 0,'CF': 0, 'RF': 0,'RS': 0,'ST': 0,'LS' : 0}


Ygk =  df['club_pos'].map(mapgk)
Ynp=Ygk.values
clf.fit(Featuresnp, Ynp) 

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [20]:
clf.coef_

array([[ 0.01681209, -0.00543345,  0.00375508, -0.00330311, -0.00701135,
         0.04587063, -0.00841177, -0.00326069]])

In [21]:
scoreslist=clf.coef_.T
scoreslist=scoreslist.reshape(8,)
output_notebook()
source = ColumnDataSource(dict(x=names,y=scoreslist))
p = figure(y_range=names, plot_width=700, plot_height=400, title='Attribute Scores')
p.yaxis.major_label_orientation = np.pi/4
p.hbar(y=names, height=0.5, left=0, right=scoreslist, color="firebrick" )
p.xaxis.axis_label = 'Score'
show(p)

### Player classifier

Here we have used SVM classifier to classify players into Attackers, Midfielders, Defenders and Goalkeepers based on their skills and traits

In [22]:
mapclass = {'GK': 0,'CB': 1,'LCB': 1,'RCB': 1,'LB':1,'RB': 1,'RWB': 1,'LWB': 1,'CM' : 2,'RM' : 2,'LDM': 2,'LAM': 2,'RDM': 2,'RAM': 2,'RCM' : 2,'LCM' : 2,'CDM': 2,'CAM': 2,'LM' : 2,'RM' : 2,'LW': 3,'RW': 3,'LF': 3,'CF': 3, 'RF': 3,'RS': 3,'ST': 3,'LS' : 3}
Yclass =  df['club_pos'].map(mapclass)
Ynp=Yclass.values
clf.fit(Featuresnp, Ynp)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovo', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [23]:
playerpos('T. Kroos',df,clf,Featuresnp)

He should play as a Midfielder


In [24]:
playerpos('Cristiano Ronaldo',df,clf,Featuresnp)

He should play as a Striker


In [25]:
playerpos('De Gea',df,clf,Featuresnp)

He should play as a Goalkeeper


In [27]:
playerpos('M. Hummels',df,clf,Featuresnp)
    

He should play as a Defender
