In [2]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)] # petal length, petal width
y = (iris["target"] == 2).astype(np.float64) # Iris-virginica

svm_clf = Pipeline((
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge"))
))
svm_clf.fit(X, y)

Pipeline(steps=(('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_svc', LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='hinge', max_iter=1000, multi_class='ovr',
     penalty='l2', random_state=None, tol=0.0001, verbose=0))))

In [3]:
svm_clf.predict([[5.5, 1.7]])

array([ 1.])

In [4]:
from sklearn.datasets import make_moons
moons = make_moons()
moons

(array([[  5.37461710e-01,  -3.86599306e-01],
        [  1.99794539e+00,   4.35929780e-01],
        [  1.90096887e+00,   6.61162609e-02],
        [  1.96729486e+00,   2.46345416e-01],
        [  7.15472413e-01,  -4.58667853e-01],
        [ -9.49055747e-01,   3.15108218e-01],
        [  3.76510198e-01,  -2.81831482e-01],
        [  5.95216657e-01,  -4.14412623e-01],
        [  1.62348980e+00,  -2.81831482e-01],
        [  8.71318704e-01,   4.90717552e-01],
        [  3.20515776e-02,   9.99486216e-01],
        [  8.38088105e-01,   5.45534901e-01],
        [  2.38554042e-01,  -1.48228395e-01],
        [  1.09602303e+00,  -4.95379113e-01],
        [ -9.00968868e-01,   4.33883739e-01],
        [  9.26916757e-01,   3.75267005e-01],
        [  1.40478334e+00,  -4.14412623e-01],
        [  7.18349350e-01,   6.95682551e-01],
        [ -9.97945393e-01,   6.40702200e-02],
        [  9.67948422e-01,  -4.99486216e-01],
        [  8.40400105e-01,  -4.87181783e-01],
        [  1.28681296e-01,   9.282

In [44]:
import random
f = lambda x: 3 * x + 5

def generate_datum(fn):
    x = random.randint(-10, 10)
    y = random.randint(-10, 10)
    label = y > fn(x)
    return x, y, label
    
def generate_data(fn, size):
    return [ generate_datum(fn) for x in range(size)]

data = generate_data(f, 10000)
training_size = 8000
training_set = data[:training_size]
test_set = data[training_size:]

def split_dataset(dataset):
    x, y, label = [x for x in zip(*data)]
    X = list(zip(x, y))
    return X, label

X_train, y_train = split_dataset(training_set)
print(X[:10])
X_test, y_test = split_dataset(test_set)

[(-5, 1), (-3, -4), (9, -3), (-7, -1), (5, -3), (6, 4), (-9, -6), (9, 6), (-10, -7), (9, -8)]


In [67]:
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

linear_svc = LinearSVC(C=10.5, loss="hinge")
svm_classifier = Pipeline((
    ("scaler", StandardScaler()),
    ("linear_svc", linear_svc)
))

svm_classifier.fit(X, y)

Pipeline(steps=(('scaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('linear_svc', LinearSVC(C=10.5, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0))))

In [68]:
from sklearn.model_selection import cross_val_score

cross_val_score(svm_classifier, X_train, y_train, cv=3, scoring="accuracy")

array([ 1.,  1.,  1.])

In [69]:
w = linear_svc.coef_[0]
b = linear_svc.intercept_[0]
print(w, "x + ", b)

[-12.51434965   4.03662676] x +  -3.982054616


In [70]:
# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

def plot_svc_decision_boundary(svm_clf, xmin, xmax):
    w = svm_clf.coef_[0]
    b = svm_clf.intercept_[0]

    # At the decision boundary, w0*x0 + w1*x1 + b = 0
    # => x1 = -w0/w1 * x0 - b/w1
    x0 = np.linspace(xmin, xmax, 200)
    decision_boundary = -w[0]/w[1] * x0 - b/w[1]

    margin = 1/w[1]
    gutter_up = decision_boundary + margin
    gutter_down = decision_boundary - margin

    svs = svm_clf.support_vectors_
    plt.scatter(svs[:, 0], svs[:, 1], s=180, facecolors='#FFAAAA')
    plt.plot(x0, decision_boundary, "k-", linewidth=2)
    plt.plot(x0, gutter_up, "k--", linewidth=2)
    plt.plot(x0, gutter_down, "k--", linewidth=2)

In [71]:
y_pred = linear_svc.predict(X_test)
n_correct = sum(y_pred == y_test)
print(n_correct / len(y_pred))

0.929
