# Chapter-5 : Support vector machines

In [1]:
import numpy as np 
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# A Support Vector Machine (SVM) is a very powerful and versatile Machine Learning
# model, capable of performing linear or nonlinear classification, regression, and even
# outlier detection. It is one of the most popular models in Machine Learning, and anyone
# interested in Machine Learning should have it in their toolbox. SVMs are particularly
# well suited for classification of complex but small- or medium-sized datasets.

## linear SVM classification

In [3]:
# see page->155 to 156 for more detail

## soft margin classification

In [7]:
# even a one outlier can make a huge difference to a svm
# and it won't even generalize as well with ouliers. SO,
# To avoid these issues it is preferable to use a more flexible model. The objective is to
# find a good balance between keeping the street as large as possible and limiting the
# margin violations (i.e., instances that end up in the middle of the street or even on the
# wrong side). This is called soft margin classification.

# you can control this balance using the C hyperparameter:
# a smaller C value leads to a wider street but more margin violations.

# page->157 diagram
# If your SVM model is overfitting, you can try regularizing it by
# reducing C.

In [8]:
# The following Scikit-Learn code loads the iris dataset, scales the features, and then
# trains a linear SVM model (using the LinearSVC class with C = 1 and the hinge loss
# function, described shortly) to detect Iris-Virginica flowers.

from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
y = (iris["target"] == 2).astype(np.float64) # iris-virginica

svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge")),
])

svm_clf.fit(X, y)



Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [10]:
svm_clf.predict([[5.5, 1.7]])
# Unlike Logistic Regression classifiers, SVM classifiers do not output
# probabilities for each class.

array([1.])