In [3]:
# Machine learning
#      Supervised Learing
#             Classification
#             Regression (continue value)
#      Unsupervised Learning 
#             Clustering (to discover groups of similar examples with data)
#             Density Estimation ( to determine the distribution of data within the input space)
#             Visualization (to project the high-dimentional space down to two or three dimensions)

In [8]:
import numpy as np
from sklearn import datasets,svm

In [6]:
# load the instance data set
# .data saved the samples, a 2-D array, shape [n_samples,n_features]
# .target saved the ground truth, shape [n_samples,n_targets], n_targets always is zero for most of all data set
iris_dataset = datasets.load_iris()
digits_dataset = datasets.load_digits()

In [13]:
# create and set the classifier
clf = svm.SVC(gamma=0.001,C=100)
# fit the classifier with train set fit(X,y)
fit_ = clf.fit(digits_dataset.data[:-1],digits_dataset.target[:-1])
print("the SVC model is :\n",fit_)
# predict the test set by the model learned above
predict_ = clf.predict(digits_dataset.data[-1:])
print("the predict result is :\n",predict_)

the SVC model is :
 SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
the predict result is :
 [8]


In [38]:
# model persistence
import pickle
s = pickle.dumps(clf)
clf2 = pickle.loads(s)
predict_ = clf2.predict(digits_dataset.data[-1:])
print("the predict result is [%d] with the svm classifier model loaded by pickle\n" %predict_)

# save model by joblib, which is more efficient on big data, the file will be divided into distributing file to save
from sklearn.externals import joblib
joblib.dump(clf,'digi_svm_classifier.pkl')
clf3 = joblib.load('digi_svm_classifier.pkl')
predict_ = clf3.predict(digits_dataset.data[-1:])
print("the predict result is [%d] with the svm classifier model loaded by joblib\n" %predict_)

# Node: joblib.dump returns a list of filenames. Each individual numpy array contained in the 'clf' object is
# serialized as a separate file on the filesystem. All files are required int the same folder when reloading
# the model with joblib.load.

# Security & maintainability limitations
# pickle (and joblib by extension), has some issues regarding maintainability and security. Because of this:
#    [1] Never unpicle untrusted data
#    [2] Models saved in one version of scikit-learn might not load in another version

the predict result is [8] with the svm classifier model loaded by pickle

the predict result is [8] with the svm classifier model loaded by joblib

