**Pipeline**
  

In [44]:
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler

In [42]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [45]:
estimators = []
estimators.append(("standardize", StandardScaler()))
estimators.append(("model", LinearDiscriminantAnalysis()))

model = Pipeline(estimators)
model.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('standardize',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('model',
                 LinearDiscriminantAnalysis(n_components=None, priors=None,
                                            shrinkage=None, solver='svd',
                                            store_covariance=False,
                                            tol=0.0001))],
         verbose=False)

In [47]:
model.score(X_test, y_test)

0.9736842105263158

**Pipeline Variant with multiple preprocessing steps**

In [53]:
from sklearn.datasets import load_breast_cancer
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold, cross_val_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

In [50]:
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [54]:
features = []
features.append(("pca", PCA(n_components=3)))
features.append(("stc", SelectKBest(k=6)))

feature_union = FeatureUnion(features)

In [55]:
estimators = []
estimators.append(("feature", feature_union))
estimators.append(("standardize", StandardScaler()))
estimators.append(("model", LogisticRegression()))

model = Pipeline(estimators)

In [56]:
kfold = KFold(n_splits=10, random_state=7)
results = cross_val_score(model, X, y, cv=kfold, n_jobs=-1)



In [57]:
results.mean()

0.9578634085213033