In [7]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

In [2]:
# In this assignment, I was not able to graph the fit because we are using 4 features instead of 2..

iris = datasets.load_iris()
X = iris.data # take all 4 features
y = iris.target # take all 3 species of flower
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.4)

# Linear Kernel

In [3]:
lin_svc = SVC(kernel='linear', gamma = 'auto').fit(X_train, y_train)
y_pred = lin_svc.predict(X_test)
print(classification_report(y_test, y_pred))
data = lin_svc.score(X_test, y_test)
print("Mean accuracy: ", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       1.00      1.00      1.00        20
           2       1.00      1.00      1.00        16

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60

Mean accuracy:  1.0


# Radial Basis Function Kernel

In [4]:
rbf_svc = SVC(kernel='rbf', gamma=0.6).fit(X_train, y_train)
y_pred = rbf_svc.predict(X_test)
print(classification_report(y_test, y_pred))
data = rbf_svc.score(X_test, y_test)
print("Mean accuracy: ", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       1.00      0.95      0.97        20
           2       0.94      1.00      0.97        16

    accuracy                           0.98        60
   macro avg       0.98      0.98      0.98        60
weighted avg       0.98      0.98      0.98        60

Mean accuracy:  0.9833333333333333


# Polynomial Kernel

In [5]:
poly_svc = SVC(kernel='poly', gamma='auto', degree=5).fit(X_train, y_train)
y_pred = poly_svc.predict(X_test)
print(classification_report(y_test, y_pred))
data = poly_svc.score(X_test, y_test)
print("Mean accuracy: ", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       0.87      1.00      0.93        20
           2       1.00      0.81      0.90        16

    accuracy                           0.95        60
   macro avg       0.96      0.94      0.94        60
weighted avg       0.96      0.95      0.95        60

Mean accuracy:  0.95


# Precomputed Kernel

In [6]:
pre_train = np.dot(X_train, X_train.T)
pre_test = np.dot(X_test, X_train.T)
pre_svc = SVC(kernel='precomputed', gamma='auto', random_state=0).fit(pre_train, y_train)
y_pred = pre_svc.predict(pre_test)
print(classification_report(y_test, y_pred))
data = pre_svc.score(pre_test, y_test)
print("Mean accuracy: ", data)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       1.00      1.00      1.00        20
           2       1.00      1.00      1.00        16

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60

Mean accuracy:  1.0


# Dicussion

In thi assignment, I fitted the Iris dataset using all 3 species and all 4 features using different kernels provided by Sklearn's SVM. The kernels I used were the "Linear Kernel",  "Radial Basis Function Kernel", "Polynomial Kernel" and the "Precomputed Kernel" to see the accuracy difference between each of them.

I notice that the accuracy would change with each kernel when running it over again. I also notice that some kernels are better at others for this particular Iris problem. I notice that the worst one out of the 4 is the polynomial kernel, which would most of the time have a lower accuracy than the other 3 with an accuracy of 0.95. The 2nd worst one is RBF kernel at an accuracy of 0.9833. And lastly, precomputed kernel and the linear kernel are tied at an accuracy of 1.

The purpose of using different kernels is to see which one can do better against the randomness of our dataset. Even though the polynomial kernel is bad for the Iris dataset, it may be the better option in some other problems. Therefore, we use differnet kernels to see which one is the best to use for the current dataset.
