# Example of a pipeline

Using the [Iris Dataset](https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html)

In [1]:
from sklearn.datasets import load_iris
import pandas as pd

In [2]:
data = load_iris()

In [3]:
X = pd.DataFrame(data.data, columns=data.feature_names)
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
y = pd.Series(data.target)
y.head()

0    0
1    0
2    0
3    0
4    0
dtype: int64

In [5]:
y.value_counts()

2    50
1    50
0    50
dtype: int64

In [6]:
data.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

# Simple Model: Predictor Only

In [7]:
# Import my predictor
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=200)
lr.fit(X,y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=200,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [8]:
lr.score(X,y)

0.9733333333333334

# More Complicated Model: Pipeline

In [9]:
# Transformer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [10]:
full_model = Pipeline([
    ('transform', StandardScaler()),
    ('predict', LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=200))
])

In [11]:
full_model.fit(X, y)

Pipeline(memory=None,
         steps=[('transform',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('predict',
                 LogisticRegression(C=1.0, class_weight=None, dual=False,
                                    fit_intercept=True, intercept_scaling=1,
                                    l1_ratio=None, max_iter=200,
                                    multi_class='auto', n_jobs=None,
                                    penalty='l2', random_state=None,
                                    solver='lbfgs', tol=0.0001, verbose=0,
                                    warm_start=False))],
         verbose=False)

In [12]:
full_model.score(X,y)

0.9733333333333334