### Loading an example dataset

In [10]:
from sklearn import datasets
iris = datasets.load_iris()
digits = datasets.load_digits()

In [11]:
print(digits.data)

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]


In [12]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

#### Shape of the data arrays

In [13]:
digits.images[0]

array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

### Learning and predicting

In [14]:
from sklearn import svm
clf = svm.SVC(gamma=0.02, C=100.)

#### Choosing the parameters of the model

In [15]:
clf.fit(digits.data[:-1], digits.target[:-1])

In [16]:
clf.predict(digits.data[-1:])

array([3])

### Conventions

#### Type Casting

In [17]:
import numpy as np
from sklearn import kernel_approximation

In [18]:
rng = np.random.RandomState(0)
X = rng.rand(10,2000)
X = np.array(X, dtype='float32')
X.dtype

dtype('float32')

In [19]:
transformer = kernel_approximation.RBFSampler()
X_new = transformer.fit_transform(X)
X_new.dtype

dtype('float32')

Regression targets are cast to `float64` and classification targets are maintained 

In [20]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)

In [21]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [22]:
clf.fit(iris.data, iris.target_names[iris.target])

In [23]:
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

#### Refitting and updating parameters
Hyperparameters of an estimator can be updated after it has been constructed

In [24]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.svm import SVC
X, y = load_iris(return_X_y=True)

In [26]:
clf = SVC()
clf.set_params(kernel='linear').fit(X,y)

In [27]:
clf.predict(X[:5])

array([0, 0, 0, 0, 0])

In [28]:
clf.set_params(kernel='rbf').fit(X,y)

In [29]:
clf.predict(X[:5])

array([0, 0, 0, 0, 0])

#### Multiclass vs multilabel fitting

In [30]:
from sklearn.svm import SVC
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer

In [31]:
X = [[1,2],[2,4],[4,5],[3,2],[3,1]]
y = [0,0,1,1,2]

In [32]:
classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

In [33]:
y = LabelBinarizer().fit_transform(y)
classif.fit(X,y).predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [34]:
from sklearn.preprocessing import MultiLabelBinarizer
y = [[0,1],[0,2],[1,3],[0,2,3],[2,4]]
y = MultiLabelBinarizer().fit_transform(y)
classif.fit(X,y).predict(X)

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])