In [3]:
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

In [12]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [27]:
clf.predict(digits.data[-2:])

array([9, 8])

## 持续化模型

In [28]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC()
iris = datasets.load_iris()
X, y = iris.data, iris.target

In [33]:
clf.fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [34]:
import pickle

In [40]:
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0:3])

array([0, 0, 0])

## 用joblib 替换 pickle 可能会更加有趣

In [41]:
from sklearn.externals import joblib

In [42]:
joblib.dump(clf, 'svc_iris.kpl')

['svc_iris.kpl']

In [43]:
modelNew = joblib.load('svc_iris.kpl')

In [47]:
modelNew.predict(X[-5:-1])

array([2, 2, 2, 2])

## sklearn 的一些规定

In [51]:
import numpy as np
from sklearn import random_projection

In [62]:
rng = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype='float32')
X.dtype

dtype('float32')

In [64]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.dtype

dtype('float64')

## 回归目标被转换为float64, 但是分类维持不变,    以及在此训练根性标签

In [67]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data[:-2], iris.target[:-2])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [70]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [74]:
clf.fit(iris.data, iris.target_names[iris.target])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [75]:
clf.predict(iris.data[:4])

array(['setosa', 'setosa', 'setosa', 'setosa'],
      dtype='<U10')

## 再次训练和更新参数

In [76]:
import numpy as np
from sklearn.svm import SVC

In [89]:
rng = np.random.RandomState(0)
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)
X_test

array([[ 0.29594983,  0.92929167,  0.26590563,  0.82814661,  0.98510868,
         0.78339665,  0.51898992,  0.06607426,  0.47241379,  0.43825595],
       [ 0.20279604,  0.42358764,  0.35775788,  0.16368426,  0.44137414,
         0.26279996,  0.52206242,  0.03516006,  0.90623142,  0.81636431],
       [ 0.55258133,  0.85180858,  0.96239507,  0.11052229,  0.63083181,
         0.997994  ,  0.98788917,  0.60332299,  0.12802087,  0.58319283],
       [ 0.00206464,  0.19891133,  0.95612316,  0.33044057,  0.63839011,
         0.28085949,  0.94782189,  0.72855873,  0.32965116,  0.79176142],
       [ 0.10816552,  0.39231894,  0.22121813,  0.68372645,  0.10244628,
         0.39702583,  0.27664973,  0.50634292,  0.34989768,  0.70641058]])

In [87]:
clf = SVC()
clf.set_params(kernel='linear').fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [88]:
clf.predict(X_test)

array([1, 0, 1, 1, 0])

In [90]:
clf.set_params(kernel='rbf').fit(X, y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [91]:
clf.predict(X_test)

array([0, 0, 0, 1, 0])

## 多分类多标签拟合

In [92]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer

In [93]:
X = [[1,2],[2,4],[4,5],[3,2],[3,1]]
y = [0,0,1,1,2]

In [94]:
classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y)
classif.predict(X)

array([0, 0, 1, 1, 2])

In [96]:
y = LabelBinarizer().fit_transform(y)
classif.fit(X, y)
classif.predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [97]:
from sklearn.preprocessing import MultiLabelBinarizer
y = [[0,1],[0,2],[1,3],[0,2,3],[2,4]]
y = MultiLabelBinarizer().fit_transform(y)
classif.fit(X, y).predict(X)

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])