# Video: Using Scikit-Learn Pipelines

This video shows an example using scikit-learn pipelines to combine the input preprocessing with a model.

In [None]:
import pandas as pd

In [None]:
abalone = pd.read_csv("https://raw.githubusercontent.com/bu-cds-omds/dx602-examples/main/data/abalone.tsv", sep="\t")
abalone.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [None]:
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [None]:
abalone_inputs = abalone.drop(["Sex", "Rings"], axis=1)
abalone_inputs.head()

Unnamed: 0,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055


In [None]:
abalone_target = abalone["Rings"]

In [None]:
pipeline = make_pipeline(StandardScaler(), PCA(), LinearRegression())
pipeline

In [None]:
pipeline.fit(abalone_inputs, abalone_target)

In [None]:
pipeline.score(abalone_inputs, abalone_target)

0.5276299399919837

In [None]:
pipeline.predict(abalone_inputs.head())

array([ 8.77893882,  7.23758471, 10.84589582,  9.27281493,  6.98954335])