In [1]:
import os
import numpy as np
import pandas as pd
import csv
from sklearn import svm
from scipy.stats import uniform, norm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import plot_confusion_matrix
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE

import warnings
warnings.filterwarnings('ignore')

In [2]:
x_train = np.empty((0,5))
y_train = np.array([])

with open("Toy Dataset.csv","r") as f:
    d_reader = csv.reader(f,delimiter=",",quotechar="\"")
    first = True
    
    for line in d_reader:
        if first:
            first = False
            continue
        x_train = np.append(x_train,np.array(list(map(int,line[2:7]))).reshape((1,5)),axis=0)
        y_train = np.append(y_train,int(line[8]))
print(x_train.shape)
print(y_train.shape)
# print(y_train)

(419, 5)
(419,)


In [3]:
rand_seed = 3454132

oversampler = SMOTE(sampling_strategy="not majority",random_state=rand_seed)

x_train, y_train = oversampler.fit_resample(x_train,y_train)

In [None]:
# SVM, Grid Search
k_folds = 10
random_search_iterations = 200
parameters = {
    'C': np.logspace(-3, 2, 6), 
    'gamma': np.logspace(-3, 2, 6), 
    'kernel':['linear', 'rbf', 'sigmoid', 'poly'], 
    'shrinking':[True, False], 
    'probability':[True, False]
}

svmc = svm.SVC()
random_search_svm = RandomizedSearchCV(svmc, parameters,n_iter=random_search_iterations,cv=k_folds,n_jobs=-1)
random_search_svm.fit(x_train,y_train)
print("Best Score: {:.4f}".format(random_search_svm.best_score_))
for k,v in random_search_svm.best_params_.items():
    print("{} => {}".format(k,v))
print("Splits: {}".format(random_search_svm.n_splits_))
y_out = random_search_svm.predict(x_train)
print("Accuracy: {:.4f}%".format(np.mean(y_out == y_train) * 100.0))
# print(y_out)

In [None]:
disp = plot_confusion_matrix(random_search_svm, x_train, y_train,
                             display_labels=["Calm","Cheerful","Bravery","Fearful","Sadness","Love"],
                             cmap=plt.cm.Blues,
                             normalize='true')

In [None]:
# LinearSVM, Grid Search
k_folds = 10
random_search_iterations = 200
parameters = {
    'penalty':['l1', 'l2'],
    'loss':['hinge', 'squared_hinge'], 
    'dual':[True, False], 
    'C': np.logspace(-3, 2, 6), 
    'multi_class':['ovr', 'crammer_singer'], 
    'max_iter':[1000, 2500, 3000]
}

linearsvm = svm.LinearSVC()
random_search_linear = RandomizedSearchCV(linearsvm, parameters,n_iter=random_search_iterations,cv=k_folds,n_jobs=-1)
random_search_linear.fit(x_train,y_train)
print("Best Score: {:.4f}".format(random_search_linear.best_score_))
for k,v in random_search_linear.best_params_.items():
    print("{} => {}".format(k,v))
print("Splits: {}".format(random_search_linear.n_splits_))
y_out = random_search_linear.predict(x_train)
print("Accuracy: {:.4f}%".format(np.mean(y_out == y_train) * 100.0))
# print(y_out)

In [None]:
disp = plot_confusion_matrix(random_search_linear, x_train, y_train,
                             display_labels=["Calm","Cheerful","Bravery","Fearful","Sadness","Love"],
                             cmap=plt.cm.Blues,
                             normalize='true')