### Scikit-Learn Pipelines


In [1]:
import numpy as np
import pandas as pd
import warnings
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error
warnings.filterwarnings("ignore")

### The Data

The data will again be the automobile dataset.  

In [2]:
auto = pd.read_csv('data/auto.csv')

In [3]:
auto.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,1,ford torino



### Creating a `Pipeline`


In [4]:

pipe = Pipeline([('quad_features', PolynomialFeatures(degree=2) ),
                 ('quad_model', LinearRegression())])

print(type(pipe))
print(pipe.named_steps)

<class 'sklearn.pipeline.Pipeline'>
{'quad_features': PolynomialFeatures(), 'quad_model': LinearRegression()}


### Fitting the Pipeline

In [5]:
X = auto[['horsepower']]
y = auto['mpg']


quad_pipe_mse = float(mean_squared_error(pipe.fit(X, y).predict(X), y))

print(type(quad_pipe_mse))
print(quad_pipe_mse)

<class 'float'>
18.98476890761722



### Examining the Coefficients

In [6]:
quad_reg = pipe.named_steps['quad_model'] #regressor from pipeline
coefs = quad_reg.coef_ #coefficients of regressor

print(type(quad_reg))
print(coefs)

<class 'sklearn.linear_model._base.LinearRegression'>
[ 0.         -0.46618963  0.00123054]


### Considering the Bias 

In [7]:
pipe_no_bias = Pipeline([('quad_features', PolynomialFeatures(degree=2, include_bias=False)),
                         ('quad_model', LinearRegression())]) #pipeline with no bias in transformer

X = auto[['horsepower']]
y = auto['mpg']

no_bias_mse = mean_squared_error(y, pipe_no_bias.fit(X,y).predict(X)) #mean squared error of new model

print(type(pipe_no_bias))
print(no_bias_mse)

<class 'sklearn.pipeline.Pipeline'>
18.984768907617216


### Building a Cubic Model with `Pipeline`

In [8]:

cubic_pipe = Pipeline([('quad_features', PolynomialFeatures(3, include_bias=False)), 
                       ('quad_model', LinearRegression())]) #pipeline with no bias in 3rd degree transformer
X = auto[['horsepower']]
y = auto['mpg']
cubic_mse = mean_squared_error(y, cubic_pipe.fit(X,y).predict(X)) #mean squared error of new model

print(type(cubic_pipe))


<class 'sklearn.pipeline.Pipeline'>


### Making Predictions on New Data


In [9]:

cube_predict = cubic_pipe.fit(X,y).predict([[200]]) #cubic pipe prediction

print(type(cube_predict))
print(cube_predict)

<class 'numpy.ndarray'>
[12.90220247]
