# loading an example dataset

In [2]:
from sklearn import datasets

In [3]:
iris = datasets.load_iris()
digits = datasets.load_digits()

In [4]:
print(digits.data)

[[ 0.  0.  5. ...  0.  0.  0.]
 [ 0.  0.  0. ... 10.  0.  0.]
 [ 0.  0.  0. ... 16.  9.  0.]
 ...
 [ 0.  0.  1. ...  6.  0.  0.]
 [ 0.  0.  2. ... 12.  0.  0.]
 [ 0.  0. 10. ... 12.  1.  0.]]


In [5]:
digits.images[0]


array([[ 0.,  0.,  5., 13.,  9.,  1.,  0.,  0.],
       [ 0.,  0., 13., 15., 10., 15.,  5.,  0.],
       [ 0.,  3., 15.,  2.,  0., 11.,  8.,  0.],
       [ 0.,  4., 12.,  0.,  0.,  8.,  8.,  0.],
       [ 0.,  5.,  8.,  0.,  0.,  9.,  8.,  0.],
       [ 0.,  4., 11.,  0.,  1., 12.,  7.,  0.],
       [ 0.,  2., 14.,  5., 10., 12.,  0.,  0.],
       [ 0.,  0.,  6., 13., 10.,  0.,  0.,  0.]])

In [6]:
digits.target

array([0, 1, 2, ..., 8, 9, 8])

# leanring and predicting, clf = classifier predict digits with fit

In [8]:
from sklearn import svm

In [9]:
clf = svm.SVC(gamma=0.001, C=100.)

In [10]:
clf.fit(digits.data[:-1], digits.target[:-1])

SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [34]:
clf.predict(digits.data[-1:])

ValueError: X.shape[1] = 64 should be equal to 4, the number of features at training time

# Model persistence, save a model with pickle (persistence model) and joblib

In [16]:
from sklearn import svm
from sklearn import datasets
clf = svm.SVC(gamma='scale')
iris=datasets.load_iris()
X, y = iris.data, iris.target
clf.fit(X,y)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [17]:
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
clf2.predict(X[0:1])

array([0])

In [20]:
y[0]

0

In [21]:
from joblib import dump, load

In [22]:
dump(clf, 'filename.joblib')

['filename.joblib']

In [23]:
clf = load('filename.joblib')

In [24]:
clf

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

# typecasting

In [26]:
import numpy as np
from sklearn import random_projection

In [27]:
rng = np.random.RandomState(0)
X = rng.rand(10,2000)
X = np.array(X, dtype='float32')
X.dtype

dtype('float32')

In [28]:
transformer = random_projection.GaussianRandomProjection()
X_new = transformer.fit_transform(X)
X_new.dtype

dtype('float64')

In [29]:
from sklearn import datasets

In [30]:
from sklearn.svm import SVC
iris = datasets.load_iris()
clf = SVC(gamma='scale')
clf.fit(iris.data,iris.target)


SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [31]:
list(clf.predict(iris.data[:3]))

[0, 0, 0]

In [32]:
print(iris.data[:3])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]]


In [35]:
clf.fit(iris.data, iris.target_names[iris.target])

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [36]:
list(clf.predict(iris.data[:3]))

['setosa', 'setosa', 'setosa']

In [38]:
print(iris.target_names[:-1])


['setosa' 'versicolor']
