In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [4]:
# Load Data
iris_data = load_iris()
X = iris_data['data'][:, (2,3)]  # petal length, petal width 
y = (iris_data['target']==2).astype(np.float64) # Iris-Virginica

In [5]:
# construct pipeline
# SVC is sensitive to scale. Theerfore we must use the StandardScaler to normalize the input
svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_svc', LinearSVC(C=1, loss="hinge"))
])
# The LinearSVC class regularizes the bias term, so you should center the training set first by subtracting its mean. 
# This is automatic if you scale the data using the StandardScaler. 
# Moreover, make sure you set the loss hyperparameter to "hinge", as it is not the default value. 
# Finally, for better performance you should set the dual hyperparameter to False, unless there are more features than training instances

In [7]:
svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linear_svc',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [9]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

In [None]:
# SVC(kernel="linear", C=1) - good if the training dataset is not large
# SGDClassifier(loss="hinge", alpha=1/(m*C)) - good but not as good as svm.LinearSVC