# sklearn Pipeline and .pkl

## Pipeline

https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html

In [1]:
from sklearn import svm
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
from sklearn.pipeline import Pipeline
# generate some data to play with
X, y = samples_generator.make_classification(
    n_informative=5, n_redundant=0, random_state=42)
# ANOVA SVM-C
anova_filter = SelectKBest(f_regression, k=5)
clf = svm.SVC(kernel='linear')
anova_svm = Pipeline([('anova', anova_filter), ('svc', clf)])
# You can set the parameters using the names issued
# For instance, fit using a k of 10 in the SelectKBest
# and a parameter 'C' of the svm
anova_svm.set_params(anova__k=10, svc__C=.1).fit(X, y)
                     



prediction = anova_svm.predict(X)
anova_svm.score(X, y)                        

# getting the selected features chosen by anova_filter
anova_svm.named_steps['anova'].get_support()




# Another way to get selected features chosen by anova_filter
anova_svm.named_steps.anova.get_support()


array([False, False,  True,  True, False, False,  True,  True, False,
        True, False,  True,  True, False,  True, False,  True,  True,
       False, False])

## .pkl
https://scikit-learn.org/stable/modules/model_persistence.html

In [2]:
# 将 anova_svm: Pipeline 用pickle导出
import pickle
s = pickle.dumps(anova_svm)
print(s)
with open('anova_svm.pkl', 'w') as f:
    pickle.dump(anova_svm, f)

b'\x80\x03csklearn.pipeline\nPipeline\nq\x00)\x81q\x01}q\x02(X\x05\x00\x00\x00stepsq\x03]q\x04(X\x05\x00\x00\x00anovaq\x05csklearn.feature_selection.univariate_selection\nSelectKBest\nq\x06)\x81q\x07}q\x08(X\n\x00\x00\x00score_funcq\tcsklearn.feature_selection.univariate_selection\nf_regression\nq\nX\x01\x00\x00\x00kq\x0bK\nX\x07\x00\x00\x00scores_q\x0ccnumpy.core.multiarray\n_reconstruct\nq\rcnumpy\nndarray\nq\x0eK\x00\x85q\x0fC\x01bq\x10\x87q\x11Rq\x12(K\x01K\x14\x85q\x13cnumpy\ndtype\nq\x14X\x02\x00\x00\x00f8q\x15K\x00K\x01\x87q\x16Rq\x17(K\x03X\x01\x00\x00\x00<q\x18NNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK\x00tq\x19b\x89C\xa0/\xed\xa7\xfa\xf7\xbc\xbf?\x93!\x10\xb7CG\xd0?Bu~\xd53\x8c\x11@H\xaa,j\x16\x05!@5\x91aX\xc3\x8e\xd3?\xf4Z\x81\xe0\xfd\x8b\xd3?1\x90A\xd2\x11,\xe7?8\x8b\x8b~\xeaE6@\x85\x9a!\xf3\x8d\xf3\xb9?\x92A\xac\xadd}5@\xd4\xd5\xf3\n\xb8\xcfv?\xc3\xb2P\xe1R\xd1%@/.Qy\xde9\xe7?\x861\xc6\xe1\xae\x14\xdf?X{\x82c\xb8\x07\x00@\x83\xcfYu2{\xdf?\xed`9\xca5\xa1\xe8?W\xbd\xf3\xdb\xb3X\

TypeError: write() argument must be str, not bytes

In [3]:
# 导入和验证
anova_svm_new = pickle.loads(s)
anova_svm.named_steps.anova.get_support()

array([False, False,  True,  True, False, False,  True,  True, False,
        True, False,  True,  True, False,  True, False,  True,  True,
       False, False])

In [4]:
with open('anova_svm.pkl', 'w') as f:
    pickle.dump(anova_svm, f)

TypeError: write() argument must be str, not bytes

In [5]:
from sklearn.externals import joblib
joblib.dump(anova_svm, 'anova_svm.pkl')

['anova_svm.pkl']

In [6]:
joblib.dump(anova_svm, 'anova_svm.loblib')

['anova_svm.loblib']

In [None]:
# test