# Support Vector Machines

In [39]:
import numpy as np
from sklearn.pipeline import Pipeline
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

## Iris Dataset
The following code, loads the iris dataset, and detects iris-virginica flowers using svm classifier. 


In [9]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.datasets import load_iris

iris = load_iris()
X = iris['data'][:, (2, 3)]
y = (iris['target'] == 2).astype(np.float64)

In [16]:
svm_clf = Pipeline([
    ('std_scale', StandardScaler()),
    ('linear_svc', LinearSVC(C=1, loss='hinge')),
])
svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('std_scale',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linear_svc',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [19]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

## SVM Kinds
There are two kinds of svm models.
1. Hard margin classifier
2. Soft margin classifier

## Polynomial Regression in SVMs

We can use polynomial features to linear a non-linear train set. For example bellow code represent it on moon dataset.


In [37]:
from sklearn.datasets import make_moons
from sklearn.preprocessing import PolynomialFeatures
m = make_moons()
X, y = pd.DataFrame(m[0]), pd.DataFrame(m[1])
Xy = X.copy()
Xy['y'] = y
poly_svm_clf = Pipeline([
    ('poly_feature', PolynomialFeatures(degree=3)),
    ('std_scale', StandardScaler()),
    ('svm_clf', LinearSVC(C=10, loss='hinge')),
])
poly_svm_clf.fit(X, y)

  y = column_or_1d(y, warn=True)


Pipeline(memory=None,
         steps=[('poly_feature',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('std_scale',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 LinearSVC(C=10, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

sns.scatterplot(x=0, y=1, hue='y', data=Xy)

## Polynomial Kernel 
low polynomial degree can not deal with complex datasets, and high degree creates a huge set of features which makes model very slow. 

We can use `poly` kernel in svc instead of polynomial features. It is very faster and acctualy doesn't use all polynomial features but consider them. 


In [26]:
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline([
    ('std_scaler', StandardScaler()),
    ('svm_clf', SVC(kernel='poly', degree=3, coef0=1, C=5))
])
poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('std_scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=5, cache_size=200, class_weight=None, coef0=1,
                     decision_function_shape='ovr', degree=3,
                     gamma='auto_deprecated', kernel='poly', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

## Adding Similarity Features 
We can change dimension of our data, so it is very easier to classify using linear methods.
