This notebook explains the role of `fit`, `transform`, and `fit_transform` within `Pipeline`.

It makes data processing in the abstraction.

In [3]:
# Import necessary libraries
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


# Custom Transformer to print debug messages
class DebugTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, name=""):
        self.name = name

    def fit(self, X, y=None):
        print(f"\n[{self.name}] fit is called")
        return self

    def transform(self, X):
        print(f"[{self.name}] transform is called")
        return X

    def fit_transform(self, X, y=None):
        print(f"[{self.name}] fit_transform is called")
        return self.fit(X, y).transform(X)


# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Define the Pipeline
pipeline = Pipeline(
    [
        ("debug1", DebugTransformer(name="Step 1: Debug")),
        ("scaler", StandardScaler()),
        ("debug2", DebugTransformer(name="Step 2: Debug")),
        ("pca", PCA(n_components=2)),
        ("debug3", DebugTransformer(name="Step 3: Debug")),
        ("classifier", LogisticRegression()),
    ]
)

# Print the sequence of operations during fit
print("=== Fitting the Pipeline ===")
pipeline.fit(X_train, y_train)

# Print the sequence of operations during transform
print("\n=== Transforming the Pipeline ===")
# Use Pipeline[:-1] to exclude the final estimator and only apply transformations
X_train_transformed = pipeline[:-1].transform(X_train)

# Print the sequence of operations during fit_transform
print("\n=== Fit-Transforming the Pipeline ===")
# Use Pipeline[:-1] to exclude the final estimator and only apply transformations
X_train_fit_transformed = pipeline[:-1].fit_transform(X_train, y_train)

# Print the sequence of operations during predict
print("\n=== Predicting with the Pipeline ===")
y_pred = pipeline.predict(X_test)

# Print the final predictions
print("\n=== Final Predictions ===")
print(y_pred)

=== Fitting the Pipeline ===
[Step 1: Debug] fit_transform is called

[Step 1: Debug] fit is called
[Step 1: Debug] transform is called
[Step 2: Debug] fit_transform is called

[Step 2: Debug] fit is called
[Step 2: Debug] transform is called
[Step 3: Debug] fit_transform is called

[Step 3: Debug] fit is called
[Step 3: Debug] transform is called

=== Transforming the Pipeline ===
[Step 1: Debug] transform is called
[Step 2: Debug] transform is called
[Step 3: Debug] transform is called

=== Fit-Transforming the Pipeline ===
[Step 1: Debug] fit_transform is called

[Step 1: Debug] fit is called
[Step 1: Debug] transform is called
[Step 2: Debug] fit_transform is called

[Step 2: Debug] fit is called
[Step 2: Debug] transform is called
[Step 3: Debug] fit_transform is called

[Step 3: Debug] fit is called
[Step 3: Debug] transform is called

=== Predicting with the Pipeline ===
[Step 1: Debug] transform is called
[Step 2: Debug] transform is called
[Step 3: Debug] transform is called



