## Loading an example dataset

In [1]:
from sklearn import datasets

In [2]:
iris = datasets.load_iris()
digits = datasets.load_digits()

In [4]:
print(digits.data)
print(digits.data.shape)

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]
(1797, 64)


In [5]:
print(digits.target)
print(digits.target.shape)

[0 1 2 ... 8 9 8]
(1797,)


In [9]:
digits.images[0]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

In [8]:
print(digits.images[0].shape)

(8, 8)


## Learing and Predicting

In [10]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)

In [11]:
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [12]:
clf.predict(digits.data[-1:])

array([8])

## Model persistence

In [13]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target
clf = clf.fit(X, y)

In [14]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0: 1])

array([0])

In [15]:
y[0]

0

In [16]:
from sklearn.externals import joblib
joblib.dump(clf, "iris.pkl")

['iris.pkl']

In [17]:
clf_new = joblib.load("iris.pkl")

In [20]:
clf_new.predict(X[1: 2])

array([0])

In [21]:
y[1]

0

In [23]:
iris = datasets.load_iris()
clf = svm.SVC()
clf.fit(iris.data, iris.target)  
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [24]:
clf.fit(iris.data, iris.target_names[iris.target])  
list(clf.predict(iris.data[:3]))  

['setosa', 'setosa', 'setosa']

### Refitting and updating parameters

In [25]:
import numpy as np
from sklearn.svm import SVC
rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)

In [28]:
clf = SVC()
clf.set_params(kernel="linear").fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [29]:
clf.predict(X_test)

array([1, 0, 1, 1, 0])

In [30]:
clf.set_params(kernel="rbf").fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [31]:
clf.predict(X_test)

array([0, 0, 0, 1, 0])

### Multiclass vs. multilabel fitting

In [49]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsOneClassifier
from sklearn.preprocessing import LabelBinarizer

X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

In [50]:
classif = OneVsOneClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 1])

In [53]:
classif.get_params

<bound method BaseEstimator.get_params of OneVsOneClassifier(estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=0, shrinking=True,
  tol=0.001, verbose=False),
          n_jobs=1)>

In [52]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer
# X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]] 
# y = [0, 0, 1, 1, 2]
y = LabelBinarizer().fit_transform(y)

# clf = OneVsRestClassifier(estimator=SVC(random_state=0))
print(classif.fit(X, y).predict(X))

ValueError: bad input shape (5, 3)

In [48]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import MultiLabelBinarizer
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
 
clf = OneVsRestClassifier(estimator=SVC(random_state=0))
y = MultiLabelBinarizer().fit_transform(y)
print(clf.fit(X, y).predict(X))

[[1 1 0 0 0]
 [1 0 1 0 0]
 [0 1 0 1 0]
 [1 0 1 0 0]
 [1 0 1 0 0]]
