In [5]:
from sklearn import datasets

iris = datasets.load_iris()
digits = datasets.load_digits()

print(digits.data)
print(digits.target)
print(digits.images[0])

[[  0.   0.   5. ...,   0.   0.   0.]
 [  0.   0.   0. ...,  10.   0.   0.]
 [  0.   0.   0. ...,  16.   9.   0.]
 ..., 
 [  0.   0.   1. ...,   6.   0.   0.]
 [  0.   0.   2. ...,  12.   0.   0.]
 [  0.   0.  10. ...,  12.   1.   0.]]
[0 1 2 ..., 8 9 8]
[[  0.   0.   5.  13.   9.   1.   0.   0.]
 [  0.   0.  13.  15.  10.  15.   5.   0.]
 [  0.   3.  15.   2.   0.  11.   8.   0.]
 [  0.   4.  12.   0.   0.   8.   8.   0.]
 [  0.   5.   8.   0.   0.   9.   8.   0.]
 [  0.   4.  11.   0.   1.  12.   7.   0.]
 [  0.   2.  14.   5.  10.  12.   0.   0.]
 [  0.   0.   6.  13.  10.   0.   0.   0.]]


In [12]:
from sklearn import svm

# We set the value of gamma manually. 
# It is possible to automatically find good values for the parameters 
# by using tools such as grid search and cross validation
classifier = svm.SVC(gamma=0.001, C=100.) 

# Fit the classifier to the model
classifier.fit(digits.data[:-1], digits.target[:-1])

# Predict new values
classifier.predict(digits.data[-1:])

array([8])

In [13]:
X, y = iris.data, iris.target
classifier_2 = svm.SVC() 
classifier_2.fit(X, y)  

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
# Model persistence

import pickle
s = pickle.dumps(classifier_2)
classfier_3 = pickle.loads(s)
classfier_3.predict(X[0:1])

array([0])

In [17]:
print(y[0])

0


In [18]:
# Using joblib's replacement of pickle (https://pythonhosted.org/joblib/persistence.html)
from sklearn.externals import joblib
joblib.dump(classifier_2, 'filename.pkl') 

classifier_4 = joblib.load('filename.pkl') 

In [26]:
# Refitting and updating parameters
import numpy as np

rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)

classifier_5 = svm.SVC()
classifier_5.set_params(kernel='linear').fit(X, y)  

classifier_5.predict(X_test)

array([1, 0, 1, 1, 0])

In [28]:
classifier_5.set_params(kernel='rbf').fit(X, y)

classifier_5.predict(X_test)

array([0, 0, 0, 1, 0])

In [30]:
# Multiclass vs. multilabel fitting¶
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer

X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

classif = OneVsRestClassifier(estimator=svm.SVC(random_state=0))
classif.fit(X, y).predict(X)
# classifier is fit on a 1d array of multiclass labels 
# and the predict() method therefore provides corresponding 
# multiclass predictions


array([0, 0, 1, 1, 2])

In [31]:
# fit upon a 2d array of binary label indicators
y = LabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

# Note that the fourth and fifth instances returned all zeroes, 
# indicating that they matched none of the three labels fit upon. 

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [33]:
from sklearn.preprocessing import MultiLabelBinarizer

# instance can be assigned multiple labels
y = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
y = MultiLabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

# In this case, the classifier is fit upon instances each assigned multiple labels.
# The MultiLabelBinarizer is used to binarize the 2d array of multilabels to fit upon. 
# As a result, predict() returns a 2d array with multiple predicted labels for each instance

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])