In [23]:
import numpy as np
from sklearn import datasets
from sklearn.datasets import make_moons

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import LinearSVC, SVC, SVR, LinearSVR

In [8]:
# IRIS DATASET

In [4]:
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
y = (iris["target"] == 2).astype(np.float64) # Iris virginica

In [11]:
svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge")),
])

In [12]:
svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [13]:
svm_clf.predict([[5.5,1.7]])

array([1.])

In [14]:
# MOONS DATASET
# adding polynomial features

In [15]:
X, y = make_moons(n_samples=100, noise=0.15)
polynomial_svm_clf = Pipeline([
    ("poly_features", PolynomialFeatures(degree=3)),
    ("scaler", StandardScaler()),
    ("svm_clf", LinearSVC(C=10, loss="hinge"))
])

In [16]:
polynomial_svm_clf.fit(X, y)

Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('scaler', StandardScaler()),
                ('svm_clf', LinearSVC(C=10, loss='hinge'))])

In [17]:
# with SVM can add polynomial features without adding them by using the kernel. This avoids adding large amount of features and slowing down the model

In [19]:
polynomial_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5))
])

In [20]:
polynomial_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=5, coef0=1, kernel='poly'))])

In [21]:
# could also use Gaussian RBF to tackle non-linearity (a similarity function - measures how much each instance resembles a landmark)
# kernel="rbf"

In [22]:
# rule of thumb - try linear kernel first, especially if there is a lot of data or features
# try rbf if dataset not too large
# can try others e.g. polynomial afterwards

In [24]:
# LINEAR AND NON LINEAR REGRESSION

In [25]:
svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X, y)

LinearSVR(epsilon=1.5)

In [26]:
svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1)

In [27]:
svm_poly_reg.fit(X, y)

SVR(C=100, degree=2, kernel='poly')