In [1]:
"""Example of a simple Pipeline that combines a transformer and an estimator.
Goal: load iris dataset, create train and test splits, and compute prediction accuracy.
Ref: https://scikit-learn.org/stable/getting_started.html
"""


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Create a pipeline object, combining together a transformer and an estimator.
pipe = make_pipeline(StandardScaler(), LogisticRegression(random_state=0))

In [3]:
# Load the iris dataset and split it into train and test sets.
X, y = load_iris(return_X_y=True)
# Default split sizes: train=0.75, test=0.25.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [4]:
# Check train-test split size.
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (112, 4)
X_test shape: (38, 4)


In [5]:
# Fit the pipeline.
pipe.fit(X_train, y_train)

Pipeline(memory=None,
         steps=[('standardscaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('logisticregression',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=100,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=0,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [6]:
# Use pipeline as an estimator: call predict().
print(accuracy_score(pipe.predict(X_test), y_test))

0.9736842105263158
