# make_pipeline

## Imports

In [43]:
from sklearn.pipeline import Pipeline, make_pipeline

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, train_test_split

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

from sklearn.datasets import load_breast_cancer

## Two variants of writing pipelines

In [44]:
pipe_long = Pipeline([('scaler', MinMaxScaler()),
                    ('svm', SVC())])

pipe_short = make_pipeline(MinMaxScaler(), SVC())

### Steps

Function give a names to steps:

- 'minmaxscaler'
- 'svc'

In [45]:
pipe_short.steps

[('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
 ('svc', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False))]

## Another pipe

And those names are unique!

- 'standardscaler-1'
- 'standardscaler-2'

In [46]:
pipe = make_pipeline(StandardScaler(), PCA(n_components=2), StandardScaler())
pipe.steps

[('standardscaler-1',
  StandardScaler(copy=True, with_mean=True, with_std=True)),
 ('pca',
  PCA(copy=True, iterated_power='auto', n_components=2, random_state=None,
    svd_solver='auto', tol=0.0, whiten=False)),
 ('standardscaler-2',
  StandardScaler(copy=True, with_mean=True, with_std=True))]

## Trying to realize pipe

In [47]:
cancer = load_breast_cancer()
pipe.fit(cancer.data)
components = pipe.named_steps['pca'].components_

print(components.shape)

(2, 30)


## Antother pipe x2

### With GridSearchCV

In [49]:
pipe = make_pipeline(StandardScaler(), LogisticRegression())
pipe.steps

[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)),
 ('logisticregression',
  LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
            intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
            penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
            verbose=0, warm_start=False))]

In [50]:
params = {'logisticregression__C': [0.001, 0.01, 0.1, 1, 10, 100]}

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=42)

grid = GridSearchCV(pipe, param_grid=params, cv=5)
grid.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logisticregression', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))]),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'logisticregression__C': [0.001, 0.01, 0.1, 1, 10, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

## Call best params

In [38]:
grid.best_params_

{'logisticregression__C': 1}

## Call best estimator

In [39]:
grid.best_estimator_

Pipeline(memory=None,
     steps=[('standardscaler', StandardScaler(copy=True, with_mean=True, with_std=True)), ('logisticregression', LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False))])

## Call best estimator info

In [40]:
grid.best_estimator_.named_steps['logisticregression']

LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

## Call best estimator param

In [41]:
grid.best_estimator_.named_steps['logisticregression'].coef_

array([[-0.37434848, -0.40676063, -0.32755015, -0.46588663, -0.19720295,
         0.63315918, -0.76169221, -1.11324369,  0.23261109,  0.05484959,
        -1.29988632,  0.22628648, -0.60431532, -0.93804174, -0.21279424,
         0.64531012, -0.12111446, -0.35783842,  0.49960687,  0.71323896,
        -0.8456996 , -1.2887434 , -0.52271849, -0.87002828, -0.53590878,
         0.13233006, -1.00135079, -0.7588972 , -1.20340803, -0.15949521]])

![gif](dank.gif)