In [7]:
import os
import numpy as np
import pickle
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

with open('dataset/preprocessed_data.pickle', 'rb') as f:
    data = pickle.load(f)
    
train_data = data['train_dataset']
test_data = data['test_dataset']

train_labels = data['train_labels']
test_labels = data['test_labels']

input_width = train_data.shape[1]
input_height = train_data.shape[2]

train_data = train_data.reshape((-1, input_width * input_height)).astype(np.float32)
test_data = test_data.reshape((-1, input_width * input_height)).astype(np.float32)

print("Running PCA")

# run pca to reduce vector size to 900
pca = PCA(n_components=900)

pca.fit(train_data)

train_data = pca.transform(train_data)

test_data = pca.transform(test_data)

print("Shape of training set after pca:")
print(train_data.shape)

print("Shape of test set after pca:")
print(test_data.shape)

# computes accuracy given the predictions and real labels
def accuracy(predictions, labels):
    batch_size = predictions.shape[0]
    sum = np.sum(predictions == labels)
    acc = (100.0 * sum) / batch_size
    return acc

print("Training SVM")

# hyperparameter tuning using grid search
param_grid = {'C': [0.01, 0.1, 1, 10, 100],
              'kernel': ['linear', 'poly', 'rbf'],
              'degree': [2, 3, 4],
              'gamma': ['scale', 'auto']}

clf = GridSearchCV(SVC(), param_grid, cv=5)

clf.fit(train_data, train_labels)

print(f"Best parameters: {clf.best_params_}")

# getting predictions of training set
train_predictions = clf.predict(train_data)

print(f"Train Accuracy: {accuracy(train_predictions, train_labels):.2f}%")

# getting predictions of test set
test_predictions = clf.predict(test_data)

print(f"Test Accuracy: {accuracy(test_predictions, test_labels):.2f}%")

if os.path.exists('models/svm_model.pkl'):
    print('File already exists.')
else:
    with open('models/svm_model.pkl', 'wb') as f:
        pickle.dump(clf, f)

Running PCA
Shape of training set after pca:
(900, 900)
Shape of test set after pca:
(100, 900)
Training SVM
Best parameters: {'C': 0.01, 'degree': 2, 'gamma': 'scale', 'kernel': 'linear'}
Train Accuracy: 99.89%
Test Accuracy: 70.00%
