# Support Vector Machine

- Iris dataset
- 2 features
- Multiclass
- Linear kernel


In [None]:
import os, sys, pathlib

UTILS_FOLDER = 'S00 - Utils'
curPath = os.getcwd()
parPath = pathlib.Path(curPath).parent
utilPath = os.path.join(parPath, UTILS_FOLDER)
for p in [curPath, str(parPath), utilPath]:
    sys.path.append(p)

In [None]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from utils import plot_ds
from sklearn.svm import SVC

In [None]:
# Read data
iris = datasets.load_iris()

In [None]:
# Extract the last 2 columns
X = iris.data[:, 2:4]
y = iris.target

In [None]:
# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=1, stratify=y
)

In [None]:
# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [None]:
# Parameters
param = "ex1"
paramSet = {
    "ex1": {"kernel": "linear", "C": 0.01},
    "ex2": {"kernel": "linear", "C": 1},
    "ex3": {"kernel": "linear", "C": 100},
}
params = paramSet[param]

In [None]:
# Create model
svm = SVC(**params, random_state=1, verbose=1)

In [None]:
# Training
svm.fit(X_train_std, y_train)

In [None]:
# Prediction
y_pred = svm.predict(X_test_std)

In [None]:
# Misclassification from the test samples
sumMiss = (y_test != y_pred).sum()

In [None]:
# Accuracy score from the test samples
accuracyScore = accuracy_score(y_test, y_pred)

In [None]:
print(f"Misclassified examples: {sumMiss}")
print(f"Accuracy score: {accuracyScore}")
print(f"Norm of W: {np.linalg.norm(svm.coef_)}")

SVMs decision function depends on some subset of the training data, called the support vectors.


In [None]:
# Print support vectors
# print(svm.support_vectors_)

# Get indices of support vectors
# print(svm.support_)

# Get number of support vectors for each class
# print(svm.n_support_)

In [None]:
# Plot decision regions
plot_ds(X_train_std, X_test_std, y_train, y_test, svm)