In [1]:
# In this notebook, I will use the Scikit-Learn library to fit a Support Vector Machine Classifier to the Iris Flower Dataset 
# (using 3 binary classifiers and then 1 multiclass classifier)

In [2]:
# Import Statements
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import matplotlib

In [3]:
# Load the data
iris_data = datasets.load_iris()
iris_data.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [4]:
# Separate the predictors and the target variable
X = iris_data['data']
Y = iris_data['target']

In [5]:
# Create the train and test set for each classifier
shuffled_indices = np.random.permutation(len(X))
train_set_size = int(0.8*len(X))

X_train = X[shuffled_indices[:train_set_size]]
X_test = X[shuffled_indices[train_set_size:]]
Y_train = Y[shuffled_indices[:train_set_size]]
Y_test = Y[shuffled_indices[train_set_size:]]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# For the Iris Setosa detector
Y_train1 = (Y[shuffled_indices[:train_set_size]] == 0).astype(np.float64)
Y_test1 = (Y[shuffled_indices[train_set_size:]] == 0).astype(np.float64)
# For the Iris Versicolor detector
Y_train2 = (Y[shuffled_indices[:train_set_size]] == 1).astype(np.float64)
Y_test2 = (Y[shuffled_indices[train_set_size:]] == 1).astype(np.float64)
# For the Iris Virginica detector
Y_train3 = (Y[shuffled_indices[:train_set_size]] == 2).astype(np.float64)
Y_test3 = (Y[shuffled_indices[train_set_size:]] == 2).astype(np.float64)

print(Y_train1[:20], '\n', Y_train2[:20], '\n', Y_train3[:20])

[0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.] 
 [0. 0. 0. 1. 0. 0. 0. 1. 0. 1. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1.] 
 [1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0.]


### Linear Kernel

In [6]:
# Whether to run a grid search or not
run_grid_search = True

In [7]:
# If True, run a grid search, or create 3 LinearSVC models for detecting each class
from sklearn.model_selection import GridSearchCV

if run_grid_search:
    param_grid = [
        {'C': [0.01, 0.1, 1, 1.0, 10], 'loss': ['hinge'], 'max_iter': [5000]},
    ]

    svm_clf = LinearSVC()

    grid_search1 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)
    grid_search2 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)
    grid_search3 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)

    grid_search1.fit(X_train, Y_train1)
    grid_search2.fit(X_train, Y_train2)
    grid_search3.fit(X_train, Y_train3)

    svm_clf1 = grid_search1.best_estimator_
    svm_clf2 = grid_search2.best_estimator_
    svm_clf3 = grid_search3.best_estimator_

    print(grid_search1.best_params_)
    print(grid_search2.best_params_)
    print(grid_search3.best_params_)
    
else:
    # Create a model for each class (binary classifiers)
    svm_clf1 = LinearSVC(C=0.1,loss='hinge', max_iter = 5000)

    svm_clf2 = LinearSVC(C=10, loss='hinge', max_iter = 5000)

    svm_clf3 = LinearSVC(C=0.1, loss='hinge', max_iter = 5000)


{'C': 0.01, 'loss': 'hinge', 'max_iter': 5000}
{'C': 10, 'loss': 'hinge', 'max_iter': 5000}
{'C': 1, 'loss': 'hinge', 'max_iter': 5000}




In [8]:
# Fit the models to the data

# Iris Setosa vs All
svm_clf1.fit(X_train,Y_train1)
Y_test1_predictions = svm_clf1.predict(X_test)
# Iris Versicolor vs All
svm_clf2.fit(X_train,Y_train2)
Y_test2_predictions = svm_clf2.predict(X_test)
# Iris Virginica vs ALl
svm_clf3.fit(X_train,Y_train3)
Y_test3_predictions = svm_clf3.predict(X_test)


In [9]:
# Evaluate the Test Set performance for all 3 models
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score,  recall_score, f1_score

def PerformanceMetrics(Y, Y_predicted):
    Y = Y.flatten()
    Y_predicted = Y_predicted.flatten()
    
    accuracy = accuracy_score(Y, Y_predicted)
    cm = confusion_matrix(Y, Y_predicted)
    precision = precision_score(Y, Y_predicted)
    recall = recall_score(Y, Y_predicted)
    f1 = f1_score(Y, Y_predicted)
    return accuracy, cm, precision, recall, f1

def DisplayMetrics(category, metrics):
    print(category)
    print('\t','Accuracy: ', metrics[0])
    print('\t','Precision: ', metrics[2])
    print('\t','Recall: ', metrics[3])
    print('\t','F1-Score: ', metrics[4])
    print('\t','Confusion Matrix: ', '\n\t', metrics[1].tolist())

print('Individual Class Performance With a Linear Kernel'.upper(), '\n')
categories = ['Iris Setosa', 'Iris Versicolor', 'Iris Virginica']
DisplayMetrics(categories[0], tuple(PerformanceMetrics(Y_test1, Y_test1_predictions)))
DisplayMetrics(categories[1], tuple(PerformanceMetrics(Y_test2, Y_test2_predictions)))
DisplayMetrics(categories[2], tuple(PerformanceMetrics(Y_test3, Y_test3_predictions)))


INDIVIDUAL CLASS PERFORMANCE WITH A LINEAR KERNEL 

Iris Setosa
	 Accuracy:  1.0
	 Precision:  1.0
	 Recall:  1.0
	 F1-Score:  1.0
	 Confusion Matrix:  
	 [[21, 0], [0, 9]]
Iris Versicolor
	 Accuracy:  0.7
	 Precision:  0.5
	 Recall:  0.3333333333333333
	 F1-Score:  0.4
	 Confusion Matrix:  
	 [[18, 3], [6, 3]]
Iris Virginica
	 Accuracy:  0.9666666666666667
	 Precision:  0.9230769230769231
	 Recall:  1.0
	 F1-Score:  0.9600000000000001
	 Confusion Matrix:  
	 [[17, 1], [0, 12]]


In [10]:
# If True, run a grid search, or create 1 multi-class classifier model
if run_grid_search: 
    param_grid = [
        {'C': [0.01, 0.1, 1, 1.0, 10], 'loss': ['hinge'], 'max_iter': [10000]},
    ]

    gridSearch = GridSearchCV(LinearSVC(), param_grid, cv = 3, scoring = 'accuracy', return_train_score = True)
    gridSearch.fit(X_train,  Y_train)

    clf = gridSearch.best_estimator_
    print(gridSearch.best_params_)

else:
    # Build Linear Support Vector Classifier (multiclass classifier)
    clf = LinearSVC(C = 1, loss = 'hinge', max_iter = 10000) # parameters determined from the grid search above

{'C': 10, 'loss': 'hinge', 'max_iter': 10000}




In [11]:
# Fit the model on the train set
clf.fit(X_train, Y_train)

# Make predictions on test set
Y_test_predictions = clf.predict(X_test)

# Assess model accuracy
cm = confusion_matrix(Y_test, Y_test_predictions)
accuracy = accuracy_score(Y_test, Y_test_predictions)

print('Multiclass Classification Performance With A Linear Kernel'.upper(), '\n')
print('Accuracy: ', '\n', accuracy, '\n')
print('Confusion Matrix: ', '\n', cm)


MULTICLASS CLASSIFICATION PERFORMANCE WITH A LINEAR KERNEL 

Accuracy:  
 0.9333333333333333 

Confusion Matrix:  
 [[ 8  1  0]
 [ 0  8  1]
 [ 0  0 12]]


### Gaussian RBF Kernel

In [12]:
# Whether to run grid search for the Gaussian RBF kernel
run_grid_search_rbf = True

In [13]:
# If True, run a grid search, otherwise create 3 SVC models with a rbf kernel (one for each class)
if run_grid_search_rbf:
    param_grid = [
        {'C': [0.01, 0.1, 1, 1.0, 10], 'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 10], 'max_iter': [5000]},
    ]

    svm_clf = SVC()

    grid_search_rbf1 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)
    grid_search_rbf2 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)
    grid_search_rbf3 = GridSearchCV(svm_clf, param_grid, cv = 3, scoring = 'recall', return_train_score = True)

    grid_search_rbf1.fit(X_train, Y_train1)
    grid_search_rbf2.fit(X_train, Y_train2)
    grid_search_rbf3.fit(X_train, Y_train3)

    svm_clf1_rbf = grid_search_rbf1.best_estimator_
    svm_clf2_rbf = grid_search_rbf2.best_estimator_
    svm_clf3_rbf = grid_search_rbf3.best_estimator_

    print(grid_search_rbf1.best_params_)
    print(grid_search_rbf2.best_params_)
    print(grid_search_rbf3.best_params_)

else:
    # Create a model for each class (binary classifiers)
    svm_clf1_rbf = SVC(kernel = 'rbf', C = 10, gamma = 0.1, max_iter = 5000)

    svm_clf2_rbf = SVC(kernel = 'rbf', C = 1, gamma = 0.3, max_iter = 5000)

    svm_clf3_rbf =SVC(kernel = 'rbf', C = 1, gamma = 0.3, max_iter = 5000)




{'C': 0.1, 'gamma': 0.03, 'max_iter': 5000}
{'C': 1, 'gamma': 0.1, 'max_iter': 5000}
{'C': 10, 'gamma': 0.03, 'max_iter': 5000}




In [14]:
# Fit the 3 models on the training set

# Iris Setosa vs All
svm_clf1_rbf.fit(X_train,Y_train1)
Y_test1_predictions_rbf = svm_clf1_rbf.predict(X_test)
# Iris Versicolor vs All
svm_clf2_rbf.fit(X_train,Y_train2)
Y_test2_predictions_rbf = svm_clf2_rbf.predict(X_test)
# Iris Virginica vs ALl
svm_clf3_rbf.fit(X_train,Y_train3)
Y_test3_predictions_rbf = svm_clf3_rbf.predict(X_test)

In [15]:
# Evaluate the performance of the 3 models on the test set
print('Individual Class Performance With a Gaussian RBF Kernel'.upper(), '\n')
categories = ['Iris Setosa', 'Iris Versicolor', 'Iris Virginica']
DisplayMetrics(categories[0], tuple(PerformanceMetrics(Y_test1, Y_test1_predictions_rbf)))
DisplayMetrics(categories[1], tuple(PerformanceMetrics(Y_test2, Y_test2_predictions_rbf)))
DisplayMetrics(categories[2], tuple(PerformanceMetrics(Y_test3, Y_test3_predictions_rbf)))


INDIVIDUAL CLASS PERFORMANCE WITH A GAUSSIAN RBF KERNEL 

Iris Setosa
	 Accuracy:  1.0
	 Precision:  1.0
	 Recall:  1.0
	 F1-Score:  1.0
	 Confusion Matrix:  
	 [[21, 0], [0, 9]]
Iris Versicolor
	 Accuracy:  0.9333333333333333
	 Precision:  0.8181818181818182
	 Recall:  1.0
	 F1-Score:  0.9
	 Confusion Matrix:  
	 [[19, 2], [0, 9]]
Iris Virginica
	 Accuracy:  1.0
	 Precision:  1.0
	 Recall:  1.0
	 F1-Score:  1.0
	 Confusion Matrix:  
	 [[18, 0], [0, 12]]


In [16]:
# If True, run a grid search, otherwise create a multiclass SVC model
if run_grid_search_rbf:
    param_grid = [
        {'C': [0.01, 0.1, 1, 1.0, 10], 'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 10], 'max_iter': [5000]},
    ]

    gridSearch_rbf = GridSearchCV(SVC(), param_grid, cv = 3, scoring = 'accuracy', return_train_score = True)
    gridSearch_rbf.fit(X_train,  Y_train)

    clf_rbf = gridSearch_rbf.best_estimator_
    print(gridSearch_rbf.best_params_)

else:
    # Build Support Vector Classifier (multiclass classifier)
    clf_rbf = SVC(kernel = 'rbf', C = 1, gamma = 0.1, max_iter = 5000)


{'C': 10, 'gamma': 0.03, 'max_iter': 5000}




In [17]:
# Fit the model on the train set
clf_rbf.fit(X_train, Y_train)

# Make predictions on test set
Y_test_predictions_rbf = clf_rbf.predict(X_test)

# Assess model accuracy
cm = confusion_matrix(Y_test, Y_test_predictions_rbf)
accuracy = accuracy_score(Y_test, Y_test_predictions_rbf)

print('Multiclass Classification Performance With A Gaussian RBF Kernel'.upper(), '\n')
print('Accuracy: ', '\n', accuracy, '\n')
print('Confusion Matrix: ', '\n', cm)


MULTICLASS CLASSIFICATION PERFORMANCE WITH A GAUSSIAN RBF KERNEL 

Accuracy:  
 1.0 

Confusion Matrix:  
 [[ 9  0  0]
 [ 0  9  0]
 [ 0  0 12]]


### Inference

In [18]:
'''
The performance of the 3 OvR classifiers (one for each class) improved significantly by making a switch from the Linear Kernel
to the Gaussian RBF Kernel. 
On the other hand, the performance of the multiclass classifier using the Gaussian RBF Kernel was only slightly better than 
that of the classifier using the Linear Kernel
'''

'\nThe performance of the 3 OvR classifiers (one for each class) improved significantly by making a switch from the Linear Kernel\nto the Gaussian RBF Kernel. \nOn the other hand, the performance of the multiclass classifier using the Gaussian RBF Kernel was only slightly better than \nthat of the classifier using the Linear Kernel\n'