In [1]:
from sklearn.pipeline import Pipeline
## feature Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

In [2]:
# These are the steps we need to perform
steps = [("standard_scaler",StandardScaler()),
        ("classifier",LogisticRegression())]

In [3]:
 # Steps are
steps

[('standard_scaler', StandardScaler()), ('classifier', LogisticRegression())]

In [8]:
#making Pipeline
pipe = Pipeline(steps)


In [11]:
pipe

In [9]:
# Visulaize the pipeline
from sklearn import set_config
set_config(display="diagram")

In [10]:
pipe

In [12]:
# Creating my own dataset
from sklearn.datasets import make_classification
X , y = make_classification(n_samples = 1000)

In [13]:
X.shape

(1000, 20)

In [14]:
from sklearn.model_selection import train_test_split

In [15]:
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)

In [16]:
pipe.fit(X_train , y_train)

In [17]:
pipe.predict(X_test)

array([1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0])

In [19]:
## EXAMPLE 2
# Displaying a pipeline with standard scaler , dimensionality reduction and then estimator
from sklearn.decomposition import PCA
from sklearn.svm import SVC

In [20]:
steps = [("feature_scaling",StandardScaler()),
        ("PCA",PCA(n_components=3)),
        ("SVC",SVC())]
pipe2 = Pipeline(steps)

In [21]:
pipe2

In [23]:
# If i want to apply the selective techinique or transformer in pipeline so we can do like this
# pipe2['feature_scaling'].fit_transform(X_train)

array([[-0.1699739 ,  0.52325998,  1.121692  , ..., -0.60336653,
         0.33762556,  1.1133128 ],
       [-0.81191238,  0.56877655, -1.40051313, ..., -0.05314603,
        -0.26491555, -2.40895572],
       [-0.13056383,  0.63353698, -0.17375427, ..., -0.96370324,
        -0.26515711,  1.36519965],
       ...,
       [-0.13246799,  1.63714197, -0.85830011, ..., -0.4555948 ,
        -0.80918532,  0.2132917 ],
       [ 0.88323126,  0.1047935 ,  0.74860046, ...,  0.61603335,
        -1.0985566 ,  0.17167014],
       [ 0.37160142, -1.13009825, -2.93242296, ..., -1.26998533,
        -0.50450326,  0.41826758]], shape=(800, 20))

In [24]:
pipe2.fit(X_train, y_train)

In [26]:
pipe2.predict(X_test)

array([1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0])

#  More Complex Pipeline
# Complex example of Columns Transformer

In [27]:
from sklearn.impute import SimpleImputer

In [29]:
# Numerical Processing Pipeline
import numpy as np
numeric_processor = Pipeline(
    steps=[("imputation_mean",SimpleImputer(missing_values=np.nan,strategy="mean")),
           ("scaler",StandardScaler())]
)
numeric_processor

In [34]:
# Categorical Processing Pipeline
from sklearn.preprocessing import OneHotEncoder
categorical_processor = Pipeline(
    steps=[("imputation_constant",SimpleImputer(fill_value="missing",strategy="constant")),
           ("onehot",OneHotEncoder(handle_unknown="ignore"))]
)

In [35]:
categorical_processor

In [32]:
# Combine the numeric and categorical pipeline
from sklearn.compose import ColumnTransformer

In [36]:
# We pass the column name where the preprocessor can be applied
preprocessor = ColumnTransformer(
    [("categorical",categorical_processor,["sex","city"]),
    ("numerical",numeric_processor,["marks","iq"])]
)


In [38]:
preprocessor

In [39]:
from sklearn.pipeline import make_pipeline

In [44]:
final_pipe = make_pipeline(preprocessor,LogisticRegression())

In [45]:
final_pipe

# Hyperparameter Tuning using Pipeline

In [46]:
from sklearn.model_selection import GridSearchCV

In [51]:
import seaborn as sns
import pandas as pd
df = pd.read_csv('diabetes.csv')

In [52]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
