<div style="padding:30px 0px;">
    <h1 align="center" style="padding:50px">Introduction to Pipelines</h1>
    <p align="center" style="font-size:small;">Seth Pruitt<br>spruitt@norstal.com<br>www.github.com/faradical</p>
</div>

In [1]:
# Import Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

## Generating Synthetic Data

In [2]:
# Generate a random regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=100)

# Split the data into traing and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
pd.DataFrame(X_train).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.771956,-1.011598,2.045661,-0.557215,0.078445,0.38245,-0.900398,1.248052,0.028342,-0.209263
1,-0.611959,0.9585,-0.56344,0.213837,-1.289285,1.596129,0.083678,0.493285,1.805893,1.211042
2,1.318432,0.247222,-0.060595,1.269384,1.115085,0.311037,-0.204427,-0.498243,-0.097647,0.598556
3,-0.143268,-1.134549,0.503527,-0.582122,-1.692,0.987057,0.612473,0.15715,-1.63687,-0.694953
4,-0.029276,0.699258,0.565877,-0.044743,0.453428,-0.702453,0.925155,-0.745698,-0.824095,1.270423


## Creating an Otherwise Unnecessary Function For Demo Purposes

In [3]:
scaler = StandardScaler().fit(X_train)

def pipe_scaler(X):
    return scaler.transform(X)

pd.DataFrame(pipe_scaler(X_train)).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.802033,-0.996948,1.963333,-0.613025,0.044991,0.439726,-0.85436,1.210124,-0.00112,-0.190335
1,-0.636596,0.97912,-0.612604,0.19275,-1.381577,1.693704,0.105697,0.476394,1.771889,1.227012
2,1.359422,0.265686,-0.116151,1.295831,1.126228,0.365943,-0.175376,-0.4875,-0.126787,0.615801
3,-0.151972,-1.120272,0.440801,-0.639054,-1.801616,1.064409,0.621586,0.149627,-1.662077,-0.675014
4,-0.034104,0.719092,0.502358,-0.077475,0.436106,-0.681198,0.926636,-0.728058,-0.85138,1.286269


## Creating Our First Pipeline

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

p1 = Pipeline([
    ("Scaler", FunctionTransformer(pipe_scaler)),  # <---Transformer .fit + .transform
    ("Linear Regression", LinearRegression())  # <---Estimator .fit + .predict
])

p1

## Evaluating Our Piped Model

In [5]:
p1.fit(X_train, y_train)
p1.score(X_test, y_test)

0.8075032814855791