In [1]:
from sklearn import svm

from sklearn.datasets import make_classification
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline

In [2]:
X, y = make_classification(n_informative=5, n_redundant=0, random_state=42)
X.shape

(100, 20)

In [5]:
anova_filter = SelectKBest(f_regression, k=5)
clf = svm.SVC(kernel='linear')

# Creata a pipeline containig 2 steps
# 1. anova filter
# 2. svm classifier
pipe = Pipeline([('anova', anova_filter), ('svc', clf)])

# Set the parameters using the names issued.
# Parameters of pipelines can be set using ‘__’ separated parameter names.
# For instance, fit using a k of 10 in the SelectKBest
# and a parameter 'C' of the svm
pipe.set_params(anova__k=10, svc__C=.1)

Pipeline(memory=None,
         steps=[('anova',
                 SelectKBest(k=10,
                             score_func=<function f_regression at 0x1a16b5a7a0>)),
                ('svc',
                 SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='linear', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [6]:
pipe.fit(X, y)

Pipeline(memory=None,
         steps=[('anova',
                 SelectKBest(k=10,
                             score_func=<function f_regression at 0x1a16b5a7a0>)),
                ('svc',
                 SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='linear', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [9]:
prediction = pipe.predict(X)
prediction

array([1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0])

In [10]:
pipe.score(X, y)

0.83

In [11]:
# getting the selected features chosen by anova_filter
pipe['anova'].get_support()

array([False, False,  True,  True, False, False,  True,  True, False,
        True, False,  True,  True, False,  True, False,  True,  True,
       False, False])

In [13]:
# Every step in the pipeline can be accessed via index
sub_pipeline = pipe[:1]
sub_pipeline

Pipeline(memory=None,
         steps=[('anova',
                 SelectKBest(k=10,
                             score_func=<function f_regression at 0x1a16b5a7a0>))],
         verbose=False)

In [15]:
pipe['svc']

SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [22]:
coeficient = pipe['svc'].coef_
coeficient

array([[-0.27800702, -0.28647747,  0.18037974, -0.61968395,  0.51723386,
         0.30916225,  0.37356076, -0.07301102,  0.1184873 , -0.04285414]])

In [24]:
# The shape of the coeficient should match the number of selected features
coeficient.shape

(1, 10)