## UFC Fight-level dataset SVM Notebook
(thre is no quick rule as to which kernel performs best in every scenario; testing & learning is key)

Kernel trick reference:
https://towardsdatascience.com/understanding-support-vector-machine-part-2-kernel-trick-mercers-theorem-e1e6848c6c4d

#### Import necessary modules

In [32]:
import os
import sys
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import classification_report, confusion_matrix, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt
%matplotlib inline

#### Helper functions

In [50]:
# rtns best params for C and Gamma; they are the parameters for a nonlinear support vector machine

def svc_parameter_optimization(X, y, nfolds):
    Cs = [0.001, 0.01, 0.1, 1, 10] # C is trade off betw. low train error and low test error (ability to generalize) 
    gammas = [0.001, 0.01, 0.1, 1] # free parameter of the Gaussian radial basis function
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=nfolds) # instantiate grid search
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_

#### Set working directory

In [2]:
os.chdir(r'/Users/colella2/Google Drive/Graduate School/MScA/Courses/31008 Data Mining Principles/Final_Project/msca31008/fun/')

In [3]:
pwd

'/Users/colella2/Google Drive/Graduate School/MScA/Courses/31008 Data Mining Principles/Final_Project/msca31008/fun'

In [4]:
os.listdir('../fun/') # confirm items in function folder

['.DS_Store',
 'r.py',
 'tonum.py',
 'hierarchical_imputation.py',
 'fillna-kmeans.py',
 'settings.py',
 'pkl.py',
 'spl.py',
 'untitled',
 'ddict.py']

In [5]:
exec(open('r.py').read()) # test ability to read .py script from function folder

In [6]:
# read-in all the functions
for filename in os.listdir('../fun/'):
    if filename.endswith('.py'):
        exec(open(filename).read())
        continue
    else:
        continue

#### Open file of interest

In [7]:
os.chdir(r'/Users/colella2/Google Drive/Graduate School/MScA/Courses/31008 Data Mining Principles/Final_Project/msca31008/out')

with open('d3-fight-level-transform.pkl', 'rb') as f:
    data = pickle.load(f)

In [8]:
load( '../out/d3-fight-level-transform.pkl' )
print( X.shape )

(4368, 165)


#### Examine key-value pairs in dict

In [9]:
for key, value in data.items():
  print(key, value)

X [[ 2.91491784e-01 -9.60590063e-02  2.50234064e-01 ... -7.94744912e-03
   3.10245107e-03 -1.87224280e-03]
 [ 2.70915901e-01 -1.24214845e-01  2.32611941e-01 ...  7.75837707e-03
   1.68645662e-03 -2.72968561e-04]
 [-9.60671224e-03 -1.16270731e-02 -9.73710726e-03 ... -2.94963859e-01
   6.74519642e-04  2.24984217e-04]
 ...
 [-1.24919035e-02 -1.80251541e-01 -1.85625621e-01 ... -1.63732606e-03
   2.16095113e-03 -1.86365288e-03]
 [-1.20995932e-02 -1.73834821e-01 -1.79162807e-01 ... -1.52725608e-04
   7.74126643e-04 -3.98965666e-04]
 [-1.29588989e-02 -1.83460266e-01 -1.89608127e-01 ... -2.01230682e-03
   1.44181940e-03 -1.33855060e-03]]
y 0       1
1       1
2       1
3       0
4       0
5       1
6       1
7       0
8       0
9       1
10      0
11      1
12      0
13      0
14      1
15      0
16      0
17      0
18      0
19      0
20      1
21      0
22      1
23      1
24      0
       ..
4343    1
4344    1
4345    1
4346    1
4347    1
4348    0
4349    1
4350    1
4351    1
4352    1


In [10]:
print(X.shape) # view feature shape; 4368 rows, 165 columns

(4368, 165)


In [11]:
print(y.shape) # view predicted value shape; 4368 rows, 1 column

(4368,)


#### Train-test split

In [12]:
X_train , X_test, y_train, y_test = train_test_split(X, y, random_state = 718, test_size = 0.3)

#### Fit model (linear kernel)
(find decision boundary for linearly separable data)

In [13]:
svclassifier_linear = SVC(kernel='linear')
svclassifier_linear.fit(X_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='linear', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [14]:
# accuracy against train data
print(classification_report(y_train,svclassifier_linear.predict(X_train)))

              precision    recall  f1-score   support

           0       0.61      0.16      0.26       981
           1       0.71      0.95      0.81      2076

    accuracy                           0.70      3057
   macro avg       0.66      0.56      0.53      3057
weighted avg       0.68      0.70      0.63      3057



In [15]:
# accuracy against test data
print(classification_report(y_test, svclassifier_linear.predict(X_test)))

              precision    recall  f1-score   support

           0       0.49      0.10      0.16       459
           1       0.66      0.94      0.78       852

    accuracy                           0.65      1311
   macro avg       0.57      0.52      0.47      1311
weighted avg       0.60      0.65      0.56      1311



In [16]:
# store predicted values on X_test & print confusion matrix
y_pred_linear = svclassifier_linear.predict(X_test)
print(confusion_matrix(y_test,y_pred_linear))

[[ 45 414]
 [ 47 805]]


### Begin section for non-linear investigation

#### Fit model (poly kernel)

In [17]:
svclassifier_poly = SVC(kernel='poly', degree=8)
svclassifier_poly.fit(X_train, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=8, gamma='auto_deprecated',
    kernel='poly', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [18]:
# accuracy against train data
print(classification_report(y_train,svclassifier_poly.predict(X_train)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       981
           1       0.68      1.00      0.81      2076

    accuracy                           0.68      3057
   macro avg       0.34      0.50      0.40      3057
weighted avg       0.46      0.68      0.55      3057



  'precision', 'predicted', average, warn_for)


In [19]:
# accuracy against test data
print(classification_report(y_test, svclassifier_poly.predict(X_test)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       459
           1       0.65      1.00      0.79       852

    accuracy                           0.65      1311
   macro avg       0.32      0.50      0.39      1311
weighted avg       0.42      0.65      0.51      1311



  'precision', 'predicted', average, warn_for)


In [20]:
# store predicted values on X_test & print confusion matrix
y_pred_poly = svclassifier_poly.predict(X_test)
print(confusion_matrix(y_test,y_pred_poly))

[[  0 459]
 [  0 852]]


#### Fit model (Gaussian kernel)
(this is a special case for rbf)

In [21]:
svclassifier_gaus = SVC(kernel='rbf')
svclassifier_gaus.fit(X_train, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='rbf', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [22]:
# accuracy against train data
print(classification_report(y_train,svclassifier_gaus.predict(X_train)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       981
           1       0.68      1.00      0.81      2076

    accuracy                           0.68      3057
   macro avg       0.34      0.50      0.40      3057
weighted avg       0.46      0.68      0.55      3057



  'precision', 'predicted', average, warn_for)


In [23]:
# accuracy against test data
print(classification_report(y_test, svclassifier_gaus.predict(X_test)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       459
           1       0.65      1.00      0.79       852

    accuracy                           0.65      1311
   macro avg       0.32      0.50      0.39      1311
weighted avg       0.42      0.65      0.51      1311



  'precision', 'predicted', average, warn_for)


In [24]:
# store predicted values on X_test & print confusion matrix
y_pred_gaus = svclassifier_gaus.predict(X_test)
print(confusion_matrix(y_test,y_pred_gaus))

[[  0 459]
 [  0 852]]


#### After first running simply (as was done above), commence optimization...

In [36]:
# commence grid search for best parameters on training set
svc_parameter_optimization(X_train, y_train, 5)

{'C': 1, 'gamma': 1}

In [37]:
# instantiate with optimal parameters
svclassifier_gaus_optim = SVC(kernel='rbf', C = 1, gamma = 1)
svclassifier_gaus_optim.fit(X_train, y_train)

SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=1, kernel='rbf', max_iter=-1,
    probability=False, random_state=None, shrinking=True, tol=0.001,
    verbose=False)

In [38]:
# view optimized results/accuracy on testing data
print(classification_report(y_train,svclassifier_gaus_optim.predict(X_train)))

              precision    recall  f1-score   support

           0       0.91      0.48      0.63       981
           1       0.80      0.98      0.88      2076

    accuracy                           0.82      3057
   macro avg       0.86      0.73      0.75      3057
weighted avg       0.84      0.82      0.80      3057



In [39]:
# check accuracy against test data
print(classification_report(y_test, svclassifier_gaus_optim.predict(X_test)))

              precision    recall  f1-score   support

           0       0.47      0.13      0.20       459
           1       0.66      0.92      0.77       852

    accuracy                           0.64      1311
   macro avg       0.57      0.53      0.49      1311
weighted avg       0.60      0.64      0.57      1311



In [40]:
# store predicted values on X_test & print confusion matrix
y_pred_gaus_optim = svclassifier_gaus_optim.predict(X_test)
print(confusion_matrix(y_test,y_pred_gaus_optim))

[[ 60 399]
 [ 67 785]]


#### Fit model (Sigmoid kernel)
(suitable for binary classification problems; rtns 0 or 1; activation functino for Neural Networks)

In [25]:
svclassifier_sig = SVC(kernel='sigmoid')
svclassifier_sig.fit(X_train, y_train)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
    kernel='sigmoid', max_iter=-1, probability=False, random_state=None,
    shrinking=True, tol=0.001, verbose=False)

In [26]:
# accuracy against train data
print(classification_report(y_train,svclassifier_sig.predict(X_train)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       981
           1       0.68      1.00      0.81      2076

    accuracy                           0.68      3057
   macro avg       0.34      0.50      0.40      3057
weighted avg       0.46      0.68      0.55      3057



  'precision', 'predicted', average, warn_for)


In [27]:
# accuracy against test data
print(classification_report(y_test, svclassifier_sig.predict(X_test)))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00       459
           1       0.65      1.00      0.79       852

    accuracy                           0.65      1311
   macro avg       0.32      0.50      0.39      1311
weighted avg       0.42      0.65      0.51      1311



  'precision', 'predicted', average, warn_for)


In [28]:
# store predicted values on X_test & print confusion matrix
y_pred_sig = svclassifier_sig.predict(X_test)
print(confusion_matrix(y_test,y_pred_sig))

[[  0 459]
 [  0 852]]


#### Conclusion
Linear SVM performs similarly to optimized Gaussian RBF on test sets.