Unless otherwise specified, input will be cast to float64:

In [3]:
import numpy as np
from sklearn import random_projection


In [4]:
np.random.rand(3,2)  #  (row, col) distr over [0,1)

array([[ 0.75469302,  0.9820999 ],
       [ 0.85752796,  0.38343956],
       [ 0.03350375,  0.09306276]])

In [5]:
rng = np.random.RandomState(0)
X = rng.rand(10, 2000)
X = np.array(X, dtype='float32')
X.dtype

dtype('float32')

In [6]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)   # cast to float64
X_new.dtype

dtype('float64')

Regression targets are cast to float64, classification targets are maintained:

In [9]:
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC()
clf.fit(iris.data, iris.target)
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [12]:
clf.fit(iris.data, iris.target_names[iris.target])
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

Refitting and updating parameters

In [13]:
X = rng.rand(100, 10)
y = rng.binomial(1, 0.5, 100)
X_test = rng.rand(5, 10)
clf = SVC()
clf.set_params(kernel = "linear").fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [14]:
clf.predict(X_test)

array([0, 0, 1, 1, 0])

Calling fit() more than once will overwrite what was learned by any previous fit()

In [15]:
clf.set_params(kernel = "rbf").fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [16]:
clf.predict(X_test)

array([1, 1, 1, 1, 0])

Multiclass vs. multilabel fitting

In [17]:
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelBinarizer

In [18]:
X = [[1, 2], [2, 4], [4, 5], [3, 2], [3, 1]]
y = [0, 0, 1, 1, 2]

In [27]:
classif = OneVsRestClassifier(estimator=SVC(random_state=0))
classif.fit(X, y).predict(X)

array([0, 0, 1, 1, 2])

It is also possible to fit upon a 2d array of binary label indicators:

In [21]:
y2 = LabelBinarizer().fit_transform(y)
print(y2)

[[1 0 0]
 [1 0 0]
 [0 1 0]
 [0 1 0]
 [0 0 1]]


In [22]:
classif.fit(X, y2).predict(X)

array([[1, 0, 0],
       [1, 0, 0],
       [0, 1, 0],
       [0, 0, 0],
       [0, 0, 0]])

Note that the fourth and fifth instances returned all zeroes, indicating that they matched none of the three labels fit upon. 

In [23]:
from sklearn.preprocessing import MultiLabelBinarizer
y3 = [[0, 1], [0, 2], [1, 3], [0, 2, 3], [2, 4]]
y3 = MultiLabelBinarizer().fit_transform(y3)
print(y3)

[[1 1 0 0 0]
 [1 0 1 0 0]
 [0 1 0 1 0]
 [1 0 1 1 0]
 [0 0 1 0 1]]


In [28]:
classif.fit(X, y3).predict(X)

array([[1, 1, 0, 0, 0],
       [1, 0, 1, 0, 0],
       [0, 1, 0, 1, 0],
       [1, 0, 1, 0, 0],
       [1, 0, 1, 0, 0]])