In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

## Pipeline with GridCV

In [3]:
cancer_data = load_breast_cancer()

In [4]:
# Same dataset as before
X = cancer_data.data
y = cancer_data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# Steps in the ML chain, in order
steps2 = [
    ('scaler', StandardScaler()),
    ('pca', PCA()),
    ('svm', SVC())
]

In [6]:
pipe2 = Pipeline(steps2)

In [7]:
# Parameters for pipeline components 
params = {'svm__C': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10, 50, 100, 1000],
         'svm__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10, 100],
          'pca__n_components':[2]
         }

In [8]:
# Run GridSearchCV
cv = GridSearchCV(pipe2, params)
cv.fit(X_train, y_train)

GridSearchCV(estimator=Pipeline(steps=[('scaler', StandardScaler()),
                                       ('pca', PCA()), ('svm', SVC())]),
             param_grid={'pca__n_components': [2],
                         'svm__C': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10, 50, 100,
                                    1000],
                         'svm__gamma': [0.0001, 0.001, 0.01, 0.1, 1, 5, 10,
                                        100]})

In [9]:
# Evaluate performance
cv.score(X_test, y_test)

0.951048951048951

In [10]:
# Best params
cv.best_params_

{'pca__n_components': 2, 'svm__C': 1000, 'svm__gamma': 0.001}