# Profiling scikit-learn Pipelines with Stripje

This notebook demonstrates how to measure detailed per-step timings for a fitted scikit-learn `Pipeline` using `PipelineProfiler`.

In [1]:
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, TransformerMixin

from stripje.profiling import PipelineProfiler


class SleepTransformer(TransformerMixin, BaseEstimator):
    """Transformer that sleeps before passing data through."""

    def __init__(self, sleep_seconds: float) -> None:
        self.sleep_seconds = sleep_seconds

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        import time

        time.sleep(self.sleep_seconds)
        return X

In [2]:
# Build a mixed-type dataset
rng = np.random.default_rng(42)
df = pd.DataFrame(
    {
        "age": rng.integers(18, 70, size=200),
        "income": rng.normal(55000, 15000, size=200),
        "city": rng.choice(["NY", "SF", "LA"], size=200),
        "owns_home": rng.choice(["yes", "no"], size=200),
    }
)
target = (df["income"] > 60000).astype(int)

In [3]:
# Define a pipeline with a ColumnTransformer and an estimator
numeric_features = ["age", "income"]
categorical_features = ["city", "owns_home"]

slow_numeric = Pipeline(
    [
        ("sleep", SleepTransformer(0.5)),
        ("impute", SimpleImputer(strategy="median")),
        ("scale", StandardScaler()),
    ]
)

slow_categorical = Pipeline(
    [
        ("sleep", SleepTransformer(0.3)),
        ("impute", SimpleImputer(strategy="most_frequent")),
        ("encode", OneHotEncoder(handle_unknown="ignore")),
    ]
)

preprocess = ColumnTransformer(
    transformers=[
        ("num", slow_numeric, numeric_features),
        ("cat", slow_categorical, categorical_features),
    ],
    remainder="drop",
    n_jobs=2,
)

model = Pipeline(
    [
        ("preprocess", preprocess),
        ("classifier", LogisticRegression(max_iter=500)),
    ]
)
model.fit(df, target)

In [4]:
# Run the profiler for prediction timings
profiler = PipelineProfiler(model, mode="predict", repetitions=2, warmup=1)
report = profiler.run(df)
compiled_report = profiler.run_compiled(df.iloc[0])



In [5]:
report

In [6]:
compiled_report

In [7]:
# Optional: Print a text-based summary
def print_report(report, title):
    print(title)
    print("-" * len(title))

    def recurse(node, indent=0):
        duration = node.mean_duration_display
        calls = f" ({node.call_count} calls)" if node.call_count > 1 else ""
        print(" " * indent + f"{node.name} ({node.kind}) - {duration}{calls}")
        for child in node.children:
            recurse(child, indent + 2)

    recurse(report.root)


print_report(report, "Batch Pipeline Profiling")
print()
print_report(compiled_report, "Compiled Single-Row Profiling")

Batch Pipeline Profiling
------------------------
pipeline (Pipeline) - 516.911 ms (2 calls)
  preprocess (ColumnTransformer) - 516.031 ms (2 calls)
    num (Pipeline) - 504.947 ms (2 calls)
      sleep (SleepTransformer) - 500.815 ms (2 calls)
      impute (SimpleImputer) - 3.010 ms (2 calls)
      scale (StandardScaler) - 0.500 ms (2 calls)
    cat (Pipeline) - 305.645 ms (2 calls)
      sleep (SleepTransformer) - 300.553 ms (2 calls)
      impute (SimpleImputer) - 3.023 ms (2 calls)
      encode (OneHotEncoder) - 1.416 ms (2 calls)
  classifier (LogisticRegression) - 0.774 ms (2 calls)

Compiled Single-Row Profiling
-----------------------------
compiled_pipeline (callable) - 808.517 ms
  preprocess (ColumnTransformer) - 808.179 ms
    num (Pipeline) - 505.005 ms
      sleep (SleepTransformer) - 500.981 ms
      impute (SimpleImputer) - 2.691 ms
      scale (StandardScaler) - 0.077 ms
    cat (Pipeline) - 303.070 ms
      sleep (SleepTransformer) - 300.711 ms
      impute (SimpleImp

## Next steps

- Adjust `repetitions` or `warmup` for more stable measurements.
- Use `report.to_dict()` to export results for further analysis.
- Compare different pipeline configurations by profiling each variant.

## Complex Pipeline Test

Let's test the profiler with a super complicated pipeline that has many nested levels and parallel transformers.

In [8]:
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.feature_selection import SelectKBest, f_classif, VarianceThreshold
from sklearn.preprocessing import MinMaxScaler, RobustScaler, PolynomialFeatures, Normalizer
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import Ridge

# Create a large, complex dataset
rng_complex = np.random.default_rng(123)
df_complex = pd.DataFrame({
    'num1': rng_complex.normal(100, 20, 500),
    'num2': rng_complex.normal(50, 10, 500),
    'num3': rng_complex.exponential(2, 500),
    'num4': rng_complex.uniform(0, 100, 500),
    'num5': rng_complex.normal(1000, 200, 500),
    'cat1': rng_complex.choice(['A', 'B', 'C', 'D'], 500),
    'cat2': rng_complex.choice(['X', 'Y', 'Z'], 500),
    'cat3': rng_complex.choice(['P', 'Q'], 500),
    'cat4': rng_complex.choice(['M', 'N', 'O', 'P', 'Q'], 500),
    'binary1': rng_complex.choice([0, 1], 500),
    'binary2': rng_complex.choice([0, 1], 500),
})
target_complex = (df_complex['num1'] > 100).astype(int)

# Define feature groups
numeric_basic = ['num1', 'num2']
numeric_advanced = ['num3', 'num4', 'num5']
categorical_simple = ['cat1', 'cat2']
categorical_complex = ['cat3', 'cat4']
binary_features = ['binary1', 'binary2']

In [9]:
# Build super complex nested pipelines with only computation

# Pipeline 1: Basic numeric processing with multiple stages
basic_numeric_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='mean')),
    ('scale', StandardScaler()),
    ('pca', PCA(n_components=2)),
])

# Pipeline 2: Advanced numeric with feature engineering
advanced_numeric_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='median')),
    ('robust_scale', RobustScaler()),
    ('variance', VarianceThreshold(threshold=0.1)),
    ('minmax', MinMaxScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
])

# Pipeline 3: Simple categorical encoding
simple_cat_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='most_frequent')),
    ('encode', OneHotEncoder(handle_unknown='ignore', sparse_output=False)),
    ('normalize', Normalizer()),
])

# Pipeline 4: Complex categorical with multiple steps
complex_cat_pipeline = Pipeline([
    ('impute', SimpleImputer(strategy='constant', fill_value='missing')),
    ('encode', OneHotEncoder(handle_unknown='ignore', sparse_output=False)),
    ('scale', MinMaxScaler()),
    ('pca', PCA(n_components=3)),
])

# Pipeline 5: Binary features processing
binary_pipeline = Pipeline([
    ('scale', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
])

# First level ColumnTransformer
preprocessing_stage1 = ColumnTransformer([
    ('basic_num', basic_numeric_pipeline, numeric_basic),
    ('advanced_num', advanced_numeric_pipeline, numeric_advanced),
    ('simple_cat', simple_cat_pipeline, categorical_simple),
    ('complex_cat', complex_cat_pipeline, categorical_complex),
    ('binary', binary_pipeline, binary_features),
], remainder='drop', n_jobs=3)

# Wrap first stage in a pipeline with post-processing
preprocessing_full = Pipeline([
    ('stage1', preprocessing_stage1),
    ('select_features', SelectKBest(f_classif, k=15)),
    ('normalize', Normalizer()),
])

# Create another ColumnTransformer that splits the preprocessed data
# This simulates a scenario where you want different models on different feature subsets
split_preprocessor = ColumnTransformer([
    ('first_half', Pipeline([
        ('pca', PCA(n_components=5)),
        ('scale', StandardScaler()),
    ]), slice(0, 8)),
    ('second_half', Pipeline([
        ('svd', TruncatedSVD(n_components=4)),
        ('minmax', MinMaxScaler()),
    ]), slice(8, 15)),
], remainder='passthrough')

# Final super complex model
super_complex_model = Pipeline([
    ('preprocessing', preprocessing_full),
    ('split_transform', split_preprocessor),
    ('final_normalize', Normalizer()),
    ('classifier', GradientBoostingClassifier(n_estimators=20, max_depth=4, random_state=42)),
])

# Fit the model
print("Fitting super complex model...")
super_complex_model.fit(df_complex, target_complex)
print("Done!")

Fitting super complex model...
Done!


  f = msb / msw


In [10]:
# Profile the super complex pipeline
print("Profiling super complex model...")
complex_profiler = PipelineProfiler(super_complex_model, mode="predict", repetitions=2, warmup=1)
complex_report = complex_profiler.run(df_complex)
complex_compiled_report = complex_profiler.run_compiled(df_complex.iloc[0])
print("Profiling complete!")

Profiling super complex model...
Profiling complete!




In [11]:
# Display the complex profiling report
complex_report

In [12]:
# Display the compiled profiling report
complex_compiled_report

In [13]:
# Print text summary of the complex pipeline
print_report(complex_report, "Super Complex Pipeline Profiling")

Super Complex Pipeline Profiling
--------------------------------
pipeline (Pipeline) - 87.935 ms (2 calls)
  preprocessing (Pipeline) - 54.102 ms (2 calls)
    stage1 (ColumnTransformer) - 51.923 ms (2 calls)
      basic_num (Pipeline) - 5.980 ms (2 calls)
        impute (SimpleImputer) - 2.774 ms (2 calls)
        scale (StandardScaler) - 0.590 ms (2 calls)
        pca (PCA) - 1.956 ms (2 calls)
      advanced_num (Pipeline) - 19.531 ms (2 calls)
        impute (SimpleImputer) - 4.863 ms (2 calls)
        robust_scale (RobustScaler) - 6.978 ms (2 calls)
        variance (VarianceThreshold) - 0.438 ms (2 calls)
        minmax (MinMaxScaler) - 1.767 ms (2 calls)
        poly (PolynomialFeatures) - 4.757 ms (2 calls)
      simple_cat (Pipeline) - 18.667 ms (2 calls)
        impute (SimpleImputer) - 6.583 ms (2 calls)
        encode (OneHotEncoder) - 8.630 ms (2 calls)
        normalize (Normalizer) - 2.846 ms (2 calls)
      complex_cat (Pipeline) - 18.579 ms (2 calls)
        impute (S

## Comparing Pipeline Reports

Compare two profiling reports to see timing differences between runs or configurations.

In [14]:
def compare_reports(report1, report2, title1="Report 1", title2="Report 2"):
    """
    Compare two profiling reports and display timing differences.
    
    Parameters:
    -----------
    report1 : ProfileReport
        First profiling report
    report2 : ProfileReport
        Second profiling report
    title1 : str
        Label for first report
    title2 : str
        Label for second report
    """
    print(f"{'Component':<40} {title1:>15} {title2:>15} {'Difference':>15} {'Change %':>12}")
    print("=" * 100)
    
    def build_paths(node, current_path=[], skip_root=True):
        """Build all paths in the tree, optionally skipping root."""
        paths = []
        if skip_root and not current_path:
            # Skip root node, start with its children
            for child in node.children:
                paths.extend(build_paths(child, [], skip_root=False))
        else:
            node_path = current_path + [node.name]
            paths.append((node_path, node))
            for child in node.children:
                paths.extend(build_paths(child, node_path, skip_root=False))
        return paths
    
    # Build paths for both reports (skipping root)
    paths1 = build_paths(report1.root)
    paths2_dict = {tuple(path): node for path, node in build_paths(report2.root)}
    
    # Compare each component
    for path, node1 in paths1:
        path_tuple = tuple(path)
        node2 = paths2_dict.get(path_tuple)
        
        # Create indented component name
        indent = "  " * (len(path) - 1)
        component_name = indent + path[-1]
        
        if node2 is not None:
            time1 = node1.mean_duration
            time2 = node2.mean_duration
            diff = time2 - time1
            
            # Calculate percentage change
            if time1 > 0:
                pct_change = (diff / time1) * 100
            else:
                pct_change = 0
            
            # Format times
            time1_str = _format_duration(time1)
            time2_str = _format_duration(time2)
            diff_str = _format_duration(abs(diff))
            
            # Add color indicator
            if diff > 0:
                indicator = "↑"  # slower
                sign = "+"
            elif diff < 0:
                indicator = "↓"  # faster
                sign = "-"
            else:
                indicator = "="  # same
                sign = ""
            
            print(f"{component_name:<40} {time1_str:>15} {time2_str:>15} {sign}{diff_str:>14} {indicator} {pct_change:>9.1f}%")
        else:
            # Component only exists in first report
            time1_str = _format_duration(node1.mean_duration)
            print(f"{component_name:<40} {time1_str:>15} {'N/A':>15} {'N/A':>15} {'N/A':>12}")
    
    # Check for components only in second report
    paths1_set = {tuple(path) for path, _ in paths1}
    for path_tuple, node2 in paths2_dict.items():
        if path_tuple not in paths1_set:
            path = list(path_tuple)
            indent = "  " * (len(path) - 1)
            component_name = indent + path[-1]
            time2_str = _format_duration(node2.mean_duration)
            print(f"{component_name:<40} {'N/A':>15} {time2_str:>15} {'N/A':>15} {'N/A':>12}")


def _format_duration(seconds: float) -> str:
    """Return a human-readable representation of elapsed time."""
    if seconds < 0:
        return f"{seconds:.3f} s"
    if seconds < 1:
        return f"{seconds * 1_000:.3f} ms"
    return f"{seconds:.3f} s"

In [15]:
# Example: Compare batch vs compiled profiling for the simple model
compare_reports(report, compiled_report, "Batch (200 rows)", "Compiled (1 row)")

Component                                Batch (200 rows) Compiled (1 row)      Difference     Change %
preprocess                                    516.031 ms      808.179 ms +    292.149 ms ↑      56.6%
  num                                         504.947 ms      505.005 ms +      0.057 ms ↑       0.0%
    sleep                                     500.815 ms      500.981 ms +      0.165 ms ↑       0.0%
    impute                                      3.010 ms        2.691 ms -      0.319 ms ↓     -10.6%
    scale                                       0.500 ms        0.077 ms -      0.424 ms ↓     -84.7%
  cat                                         305.645 ms      303.070 ms -      2.575 ms ↓      -0.8%
    sleep                                     300.553 ms      300.711 ms +      0.158 ms ↑       0.1%
    impute                                      3.023 ms        1.224 ms -      1.800 ms ↓     -59.5%
    encode                                      1.416 ms        0.072 ms -      

In [16]:
# Example: Compare batch vs compiled for the complex model
compare_reports(complex_report, complex_compiled_report, "Complex Batch", "Complex Compiled")

Component                                  Complex Batch Complex Compiled      Difference     Change %
preprocessing                                  54.102 ms        9.824 ms -     44.278 ms ↓     -81.8%
  stage1                                       51.923 ms        9.432 ms -     42.492 ms ↓     -81.8%
    basic_num                                   5.980 ms        2.575 ms -      3.405 ms ↓     -56.9%
      impute                                    2.774 ms        1.416 ms -      1.358 ms ↓     -48.9%
      scale                                     0.590 ms        0.082 ms -      0.507 ms ↓     -86.0%
      pca                                       1.956 ms        0.043 ms -      1.913 ms ↓     -97.8%
    advanced_num                               19.531 ms        2.531 ms -     17.000 ms ↓     -87.0%
      impute                                    4.863 ms        1.076 ms -      3.786 ms ↓     -77.9%
      robust_scale                              6.978 ms        0.035 ms -      6