## Support Vector Machines

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
iris = datasets.load_iris()
X = iris['data'][:, (2, 3)] # petal lenght, petal width
y = (iris['target'] == 2).astype(np.float64) # Iris-Virginica

svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss='hinge')),
])

svm_clf.fit(X, y)

In [3]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

### Nonlinear SVC Classification

In [4]:
from sklearn.datasets import make_moons
X, y = make_moons()

In [5]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

polynomial_svm_clf = Pipeline([
    ("poly_features", PolynomialFeatures(degree=3)),
    ("sclaer", StandardScaler()),
    ("svm-clf", LinearSVC(C=10, loss="hinge")),
])

polynomial_svm_clf.fit(X, y)



In [6]:
from sklearn.svm import SVC

poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0 = 1, C=5)),
])

poly_kernel_svm_clf.fit(X, y)

In [7]:
# Gaussian RBF Kernel
rbf_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel='rbf', gamma=5, C=0.001)),
])

rbf_kernel_svm_clf.fit(X, y)

### SVM Regression

In [8]:
from sklearn.svm import LinearSVR

svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X, y)

In [9]:
from sklearn.svm import SVR

svm_poly_reg = SVR(kernel='poly', degree=2, C=100, epsilon=0.1)
svm_poly_reg.fit(X, y)

### Exercises

#### Question 1

Try to find the hyperplane(s) that best separates the classes on the dataset. 

#### Question 2

A support vector is a point of category in the dataset that determines the margin of the category

#### Question 3

With unscaled data, the Support Vector Machines tends to create a axial (horizontal or vertical) hyperplane that divides the dataset, difficulting the generalization.

#### Question 4

The Support Vector Machine separates the dataset in a way that split it exclusively in the existent categories, with no doubt.

#### Question 5

The dual problem performs better when there is a smaller number of intances than the number of features, which also is good for kernel trick. But this isn't the case for larger number of instances, hence it's better to use the Primal Problem.

#### Question 6

You can try to increase the $\gamma$ hyperparameter or increase the $C$ hyperparameter.

#### Question 7

Just add the $C$ factor in the H matrix

#### Question 9

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import load_digits
from sklearn.multiclass import OneVsRestClassifier

In [11]:
parameters = {
    'kernel': ('linear', 'poly', 'rbf', 'sigmoid'),
    'C': [0.1, 1.0, 10., 100.],
}

In [12]:
digits = load_digits()

In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25)

In [14]:
O_V_R = OneVsRestClassifier(SVC(C=10))
O_V_R.fit(X_train, y_train)

In [15]:
O_V_R.score(X_test, y_test)

0.9977777777777778

### Score: $99.8\%$

#### Question 10

In [16]:
from sklearn.datasets import fetch_california_housing

In [17]:
housingDS = fetch_california_housing()

In [18]:
# test, validation and training sets
X_train, X_test, y_train, y_test = train_test_split(housingDS.data, housingDS.target, test_size=0.25)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

In [22]:
svr_clf = SVR(kernel='poly', C = 100)
svr_clf.fit(X_train, y_train)

### Score: $56,7\% \approx 57\%$