In [11]:
from sklearn.model_selection import KFold
import os
import glob

import numpy as np
import pandas as pd


import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import svm, datasets
import sklearn.metrics

import utils

from skl_dataset import SKLSwingDataset, oversample_minority

from sklearn.metrics import confusion_matrix
#import seaborn as sn

def calcAccuracy(predictions, truth):
    numPreds = len(predictions)
    p = 0
     
    for i in range(numPreds):
        if predictions[i] == truth[i]:
            p += 1
      
    return p/numPreds


os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

path = "data/"
shot_types = ["Pull-hook", "Hook", "Pull", "Fade", "Straight", "Draw", "Push", "Slice" , "Push-slice"]
use_partitioning  = True

n_shot_types = 4 if use_partitioning else 10

X_data, y_data = utils.load_data(path)

use_oversampling = True
if use_oversampling:
    X_data, y_data = oversample_minority(X_data, y_data)

k_fold = 10

n = X_data.shape[0]
kf = KFold(n_splits=k_fold)
kf.get_n_splits(X_data)

total_acc = 0
total_MSE = 0

all_test_shot_types = [] 
all_true_shot_types = []
    
all_test_shot_types = [] 
all_true_shot_types = []



for k_i, (train_index, test_index) in enumerate(kf.split(X_data)):
    
    
    X_train, X_test = X_data[train_index], X_data[test_index]
    y_train, y_test = y_data[train_index], y_data[test_index]
    
    train_set = SKLSwingDataset(X_train, y_train, augment=True, oversample = True)
    test_set = SKLSwingDataset(X_test, y_test, mean=train_set.mean, std=train_set.std, y_mean=train_set.y_dist_mean, y_std=train_set.y_dist_std)

    
    train_classes = [row[:-1] for row in y_train]
    y_train = np.argmax(train_classes, axis=1)

    if use_partitioning:
        for i in range(len(y_train)):
            y_train[i] = y_train[i] // n_shot_types


    #Reshape training data
    n, nx, ny = X_train.shape
    X_train = X_train.reshape((n,nx*ny))


    #Test data    
    X_test = test_set.X_data
    y_test = test_set.y_data



    #Reshape test data
    n, nx, ny = X_test.shape
    X_test = X_test.reshape((n,nx*ny))

    test_classes = [row[:-1] for row in y_test]
    y_test = np.argmax(test_classes, axis=1)

    if use_partitioning:
        for i in range(len(y_test)):
            y_test[i] = y_test[i] // n_shot_types
            
            
    #One-vs-one SVM with rbf kernel.
    rbfsvm = svm.SVC(kernel='rbf', decision_function_shape='ovo',
                   gamma='auto', C=1, probability=True).fit(X_train, y_train)
   
    predictions = rbfsvm.predict(X_test)
    accuracy = calcAccuracy(predictions, y_test)
    
    all_test_shot_types.extend(predictions) 
    all_true_shot_types.extend(y_test)

    print ("Fold %d: Accuracy is %f (%f" % (k_i+1, accuracy, accuracy * 100) + "%)")

Fold 1: Accuracy is 0.913043 (91.304348%)
Fold 2: Accuracy is 0.826087 (82.608696%)
Fold 3: Accuracy is 0.956522 (95.652174%)
Fold 4: Accuracy is 0.869565 (86.956522%)
Fold 5: Accuracy is 0.826087 (82.608696%)
Fold 6: Accuracy is 0.863636 (86.363636%)
Fold 7: Accuracy is 0.909091 (90.909091%)
Fold 8: Accuracy is 0.863636 (86.363636%)
Fold 9: Accuracy is 0.727273 (72.727273%)
Fold 10: Accuracy is 0.909091 (90.909091%)


In [14]:
cm = confusion_matrix(all_true_shot_types, all_test_shot_types)
cm
#plt.figure(figsize = (10,7))
#sn.heatmap(cm, annot=True)

array([[ 91,   9,   0],
       [  0, 100,   0],
       [  0,  21,   4]])

In [16]:
0.913043 + 0.826087 + 0.956522 + 0.869565 + 0.826087 + 0.863636 + 0.909091 + 0.863636 + 0.727273 + 0.909091 

8.664031