# Training a Support Vector Classifier
    for Paladins Ranked Match Prediction

### Import libraries

In [1]:
import pandas as pd
from sklearn.metrics import classification_report

### Loading in the data
Let's take a look at our processed data.

In [2]:
data = pd.read_csv("data/processed.csv")
data.head()

Unnamed: 0,match_id,winner,map,region,date_time,sum_lvl1,sum_lvl2,range_lvl1,range_lvl2,sum_tier1,...,c_wr5,c_wr6,c_wr7,c_wr8,c_wr9,c_wr10,sum_cwr1,sum_cwr2,weight_cwr1,weight_cwr2
0,1076464492,1,Ranked Ascension Peak,EU,3/26/2021 11:00:03 PM,478,429,137,105,57,...,0.5769,0.6667,0.4778,0.566,0.4615,0.5,1.7602,2.672,0.5789,0.5082
1,1076464507,0,Ranked Ice Mines,EU,3/26/2021 11:00:07 PM,192,253,38,52,31,...,1.0,0.4286,0.0,0.0,0.5,0.4286,3.4194,1.3572,0.5385,0.3846
2,1076464511,0,Ranked Frozen Guard,EU,3/26/2021 11:00:08 PM,362,1948,104,409,80,...,0.5366,0.5,0.0,0.0,0.3077,0.0,3.5205,0.8077,0.5417,0.483
3,1076464516,1,Ranked Splitstone Quarry,EU,3/26/2021 11:00:09 PM,240,280,78,100,74,...,0.0,0.6364,0.568,0.6,0.55,0.6667,1.828,3.0211,0.6593,0.5674
4,1076464517,1,Ranked Timber Mill,Brazil,3/26/2021 11:00:10 PM,2565,2785,815,515,105,...,0.5402,0.5537,0.4,0.5172,0.4706,0.0,0.8259,1.9415,0.4907,0.5147


#### Checking the balance of the dataset

In [3]:
label = data["winner"]

In [4]:
print(label.value_counts())

0    513
1    511
Name: winner, dtype: int64


### Feature Selection

Looking at the top 10 features correlated with the target

In [5]:
cor = data.corr()
cor_target = abs(cor["winner"])
cor_target.sort_values(ascending=False)[:10]

winner         1.000000
sum_cwr1       0.558434
sum_cwr2       0.550058
c_wr5          0.397344
weight_cwr1    0.372115
sum_awr2       0.359105
sum_ckda2      0.354187
sum_ckda1      0.353741
sum_awr1       0.353495
c_wr10         0.352634
Name: winner, dtype: float64

#### Selecting the features to build the classifier

In [6]:
features = ["sum_awr1", "sum_awr2", "sum_ckda1", "sum_ckda2", "sum_cwr1", "sum_cwr2"]

#### Seperating the features and target into seperate lists

In [7]:
X = data[features].values.tolist()
y = data["winner"].tolist()

### Split data into Train/Test

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                   test_size=0.2,
                                                   random_state=42)

### Train a classifier without hyperparameters

In [9]:
from sklearn.svm import SVC

baseline = SVC()
baseline.fit(X_train, y_train)

SVC()

#### Take a baseline of the classifiers accuracy

In [10]:
base_preds = baseline.predict(X_test)

print(classification_report(y_test, base_preds))
print(f"Score: {baseline.score(X_test, y_test):.4f}")

              precision    recall  f1-score   support

           0       0.84      0.87      0.85       101
           1       0.87      0.84      0.85       104

    accuracy                           0.85       205
   macro avg       0.85      0.85      0.85       205
weighted avg       0.85      0.85      0.85       205

Score: 0.8537


### Hyper-parameter tuning using GridSearchCV
Search through every combination from the parameter grid (may take some time, depending on local machine)

#### Define the parameter range

In [11]:
param_grid = {
    'C': [0.1, 1, 10, 100, 1000], 
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf', 'linear', 'poly']
             } 

#### Create GridSearch object and fit to training data

In [12]:
from sklearn.model_selection import GridSearchCV
   
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid.fit(X_train[:100], y_train[:100])

Fitting 5 folds for each of 75 candidates, totalling 375 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.600 total time=   0.0s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.500 total time=   0.0s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.500 total time=   0.0s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.500 total time=   0.0s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.500 total time=   0.0s
[CV 1/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.650 total time=   0.0s
[CV 2/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.700 total time=   0.0s
[CV 3/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.700 total time=   0.0s
[CV 4/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.750 total time=   0.0s
[CV 5/5] END .....C=0.1, gamma=1, kernel=linear;, score=0.800 total time=   0.0s
[CV 1/5] END .......C=0.1, gamma=1, kernel=poly;, score=0.850 total time=   2.6s
[CV 2/5] END .......C=0.1, gamma=1, kernel=poly

[CV 1/5] END ........C=10, gamma=1, kernel=poly;, score=0.650 total time=  11.7s
[CV 2/5] END ........C=10, gamma=1, kernel=poly;, score=0.650 total time=  10.7s
[CV 3/5] END ........C=10, gamma=1, kernel=poly;, score=0.600 total time=  29.7s
[CV 4/5] END ........C=10, gamma=1, kernel=poly;, score=0.450 total time=   5.5s
[CV 5/5] END ........C=10, gamma=1, kernel=poly;, score=0.800 total time=  39.0s
[CV 1/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.750 total time=   0.0s
[CV 2/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.550 total time=   0.0s
[CV 3/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.550 total time=   0.0s
[CV 4/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.650 total time=   0.0s
[CV 5/5] END .......C=10, gamma=0.1, kernel=rbf;, score=0.850 total time=   0.0s
[CV 1/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.800 total time=   0.0s
[CV 2/5] END ....C=10, gamma=0.1, kernel=linear;, score=0.650 total time=   0.0s
[CV 3/5] END ....C=10, gamma

[CV 1/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.550 total time=   0.0s
[CV 2/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.600 total time=   0.0s
[CV 3/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.750 total time=   0.0s
[CV 4/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.700 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.001, kernel=poly;, score=0.800 total time=   0.0s
[CV 1/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.650 total time=   0.0s
[CV 2/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.650 total time=   0.0s
[CV 3/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.650 total time=   0.0s
[CV 4/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.750 total time=   0.0s
[CV 5/5] END ...C=100, gamma=0.0001, kernel=rbf;, score=0.800 total time=   0.0s
[CV 1/5] END C=100, gamma=0.0001, kernel=linear;, score=0.750 total time=   0.0s
[CV 2/5] END C=100, gamma=0.0001, kernel=linear;, score=0.750 total time=   0.0s
[CV 3/5] END C=100, gamma=0.

GridSearchCV(estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                         'kernel': ['rbf', 'linear', 'poly']},
             verbose=3)

#### Find the optimal parameters

In [13]:
print(grid.best_params_)

{'C': 1000, 'gamma': 1, 'kernel': 'linear'}


### Create another classifier with the new parameters

In [14]:
clf = SVC(C=1000, gamma=1, kernel='linear', probability=True)

#### Fit to training data

In [15]:
clf.fit(X_train, y_train)

SVC(C=1000, gamma=1, kernel='linear', probability=True)

#### Test the accuracy of the new model

In [16]:
preds = clf.predict(X_test)

print(classification_report(y_test, preds))
print(f"Score: {clf.score(X_test, y_test):.4f}")

              precision    recall  f1-score   support

           0       0.84      0.89      0.87       101
           1       0.89      0.84      0.86       104

    accuracy                           0.86       205
   macro avg       0.86      0.86      0.86       205
weighted avg       0.86      0.86      0.86       205

Score: 0.8634


### Export the model

In [17]:
import pickle
filename = "model.sav"
pickle.dump(clf, open(filename, 'wb'))