In [1]:
from sklearn.model_selection import train_test_split

from SupportVectorMachine import SVM
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from OneVsRestClassifier import OneVsRestClassifier
from sklearn.datasets import load_iris

In [2]:
plt.rcParams['figure.figsize'] = (10, 6)
sns.set_style('darkgrid')
sns.set_palette(sns.color_palette('deep'))

# Loading the dataset

In [3]:
iris = load_iris()
df = pd.DataFrame(data=iris['data'], columns=iris['feature_names'])
df['class'] = pd.Categorical(iris['target'])
df['class'] = df['class'].apply(lambda c: iris['target_names'][c])

In [4]:
X, y = np.array(df.drop(columns=['class'])), np.array(df['class'])

# Generating a pair-plot

In [5]:
# pp = sns.pairplot(df, hue='class')
# pp.fig.savefig('pair-plot.svg')

# Splitting to train, val, test

In [6]:
train_ratio = 0.75
validation_ratio = 0.15
test_ratio = 0.10

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_ratio, random_state=1)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=test_ratio / (test_ratio + validation_ratio), random_state=1)

X_train.shape

(112, 4)

# Defining the classifier and training

In [7]:
linear_svm = OneVsRestClassifier(SVM, 3, class_labels=iris['target_names'])

In [8]:
linear_svm.fit(X_train, y_train)

# Defining accuracy measure to apply to validation set

In [9]:
def accuracy(y_pred, y_true):
    return np.sum(y_pred == y_true) / len(y_pred)

# Testing on val set

In [10]:
accuracy([linear_svm.predict(x) for x in X_val], y_val)

0.7272727272727273

It would seem like we could do better; C needs to be tuned, probably.

# Tuning C

In [11]:
scores = []
options_for_C = [0.07, 0.075, 0.1, 0.125, 0.15, 0.20, 0.25, 0.35, 0.5]
for C in options_for_C:
    linear_svm = OneVsRestClassifier(SVM, 3, class_labels=iris['target_names'], C=C, tol=1e-5)
    linear_svm.fit(X_train, y_train)
    scores.append(accuracy([linear_svm.predict(x) for x in X_val], y_val))

In [12]:
# optimal_C = options_for_C[np.argmax(scores)]
optimal_C = 0.125

In [17]:
linear_svm = OneVsRestClassifier(SVM, len(iris['target_names']),
                                 class_labels=iris['target_names'], C=optimal_C, kernel='rbf', gamma=0.1)
linear_svm.fit(X_train, y_train)

In [18]:
accuracy([linear_svm.predict(x) for x in X_val], y_val)

0.9090909090909091

In [19]:
optimal_C

0.125