# APIs

In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LogisticRegression

# Pipeline 1

In [4]:
cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)

In [5]:
pipe = Pipeline([('scaler', MinMaxScaler()),
                 ('svm', SVC())])

pipe.fit(X_train, y_train)
print(f'Test score: {pipe.score(X_test, y_test):.2f}')

Test score: 0.97


# Pipeline in Grid Searches

In [7]:
param_grid = {'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
              'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

grid = GridSearchCV(pipe, param_grid=param_grid, cv=5)
grid.fit(X_train, y_train)
print(f'Best CV accuracy: {grid.best_score_:.2f}')
print(f'Test set score: {grid.score(X_test, y_test):.2f}')
print(f'Best parameters: {grid.best_params_}')

Best CV accuracy: 0.98
Test set score: 0.97
Best parameters: {'svm__C': 1, 'svm__gamma': 1}


# make_pipeline

In [9]:
pipe_short = make_pipeline(MinMaxScaler(), SVC(C=100))

In [10]:
print(f'Pipeline steps:\n{pipe_short.steps}')

Pipeline steps:
[('minmaxscaler', MinMaxScaler()), ('svc', SVC(C=100))]


# Step Attributes

In [12]:
pipe_short.fit(X_train, y_train)
dual_coef = pipe_short.named_steps['svc'].dual_coef_
print(dual_coef.shape)

(1, 56)


# Attributes in Grid Search

In [15]:
pipe = make_pipeline(StandardScaler(), LogisticRegression())

In [16]:
param_grid = {'logisticregression__C': [0.01, 0.1, 1, 10, 100]}
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=4)
grid = GridSearchCV(pipe, param_grid, cv=5)
grid.fit(X_train, y_train)

In [17]:
print(f'Best estimator:\n{grid.best_estimator_}')

Best estimator:
Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression', LogisticRegression(C=1))])


In [18]:
print(f'Logistic Regression coefficients:\n{grid.best_estimator_.named_steps['logisticregression'].coef_}')

Logistic Regression coefficients:
[[-0.4475566  -0.34609376 -0.41703843 -0.52889408 -0.15784407  0.60271339
  -0.71771325 -0.78367478  0.04847448  0.27478533 -1.29504052  0.05314385
  -0.69103766 -0.91925087 -0.14791795  0.46138699 -0.1264859  -0.10289486
   0.42812714  0.71492797 -1.08532414 -1.09273614 -0.85133685 -1.04104568
  -0.72839683  0.07656216 -0.83641023 -0.64928603 -0.6491432  -0.42968125]]


# Grid Searching Preprocessing Steps