# Demo: Understanding Transfory's Exceptions

This notebook demonstrates the various custom exceptions `Transfory` raises to provide clear and actionable feedback when issues arise during data preprocessing. Understanding these exceptions will help you debug your pipelines more effectively.

In [1]:
import sys
import os
# In a Jupyter notebook, __file__ is not defined. We can use a relative path to add the project root.
# This assumes the notebook is in the 'demo' folder, and 'transfory' is in the parent directory.
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)


import sys
import os
import pandas as pd
import numpy as np

# Add the project root to the path to allow importing 'transfory'
# This assumes the notebook is in the 'examples' folder, and 'transfory' is in the parent directory.
project_root = os.path.abspath('..')
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from transfory import (
    Pipeline,
    ColumnTransformer,
    MissingValueHandler,
    Encoder,
    Scaler,
    OutlierHandler,
    DatetimeFeatureExtractor,
    FeatureGenerator,
    InsightReporter,
    BaseTransformer
)
from transfory.exceptions import (
    TransforyError,
    InvalidStepError,
    NotFittedError,
    FrozenTransformerError,
    ConfigurationError,
    ColumnMismatchError,
    NoApplicableColumnsError,
    PipelineProcessingError,
    PipelineLogicError,
)

print("Transfory modules and exceptions loaded successfully!")

Transfory modules and exceptions loaded successfully!


### Helper Function to Display Errors

We'll use a simple `try-except` block to catch and display the custom exceptions, making the output clean and readable.

In [2]:
def demonstrate_error(func, error_type):
    """Helper to run a function and display the expected error."""
    print(f"\n--- Demonstrating {error_type.__name__} ---")
    try:
        func()
    except error_type as e:
        print(f"Caught expected {error_type.__name__}:\n{e}")
    except Exception as e:
        print(f"Caught unexpected error: {type(e).__name__}:\n{e}")

## 1. `ConfigurationError`

Raised when a transformer is initialized with invalid or conflicting parameters.

In [3]:
def trigger_configuration_error_missing_value_handler():
    print("Attempting to create MissingValueHandler with strategy='constant' but no fill_value...")
    MissingValueHandler(strategy="constant")

def trigger_configuration_error_scaler():
    print("Attempting to create Scaler with an unsupported method...")
    Scaler(method="invalid_scaling_method")

def trigger_configuration_error_encoder():
    print("Attempting to create Encoder with an unsupported method...")
    Encoder(method="unsupported_encoding")

def trigger_configuration_error_outlier_handler():
    print("Attempting to create OutlierHandler with invalid percentile values...")
    OutlierHandler(method="percentile", lower_quantile=0.5, upper_quantile=0.1)

demonstrate_error(trigger_configuration_error_missing_value_handler, ConfigurationError)
demonstrate_error(trigger_configuration_error_scaler, ConfigurationError)
demonstrate_error(trigger_configuration_error_encoder, ConfigurationError)
demonstrate_error(trigger_configuration_error_outlier_handler, ConfigurationError)


--- Demonstrating ConfigurationError ---
Attempting to create MissingValueHandler with strategy='constant' but no fill_value...
Caught expected ConfigurationError:
`fill_value` must be provided when strategy is 'constant'.

--- Demonstrating ConfigurationError ---
Attempting to create Scaler with an unsupported method...
Caught expected ConfigurationError:
Method 'invalid_scaling_method' is not supported. Available methods: ['minmax', 'zscore']

--- Demonstrating ConfigurationError ---
Attempting to create Encoder with an unsupported method...
Caught expected ConfigurationError:
Method 'unsupported_encoding' is not supported. Use one of ['label', 'onehot'].

--- Demonstrating ConfigurationError ---
Attempting to create OutlierHandler with invalid percentile values...
Caught expected ConfigurationError:
Percentiles must be between 0 and 1, and lower_quantile must be less than upper_quantile.


## 2. `NoApplicableColumnsError`

Raised when a transformer is applied to a DataFrame but finds no columns it can operate on (e.g., a `Scaler` on a DataFrame with no numeric columns).

In [4]:
df_text_only = pd.DataFrame({"col1": ["A", "B"], "col2": ["X", "Y"]})
df_numeric_only = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
df_empty = pd.DataFrame()

def trigger_no_applicable_columns_error_scaler():
    print("Attempting to fit Scaler on a DataFrame with no numeric columns...")
    Scaler().fit(df_text_only)

def trigger_no_applicable_columns_error_encoder():
    print("Attempting to fit Encoder on a DataFrame with no categorical columns...")
    Encoder().fit(df_numeric_only)

def trigger_no_applicable_columns_error_outlier_handler():
    print("Attempting to fit OutlierHandler on a DataFrame with no numeric columns...")
    OutlierHandler().fit(df_text_only)

def trigger_no_applicable_columns_error_datetime_extractor():
    print("Attempting to fit DatetimeFeatureExtractor on a DataFrame with no datetime columns...")
    DatetimeFeatureExtractor().fit(df_numeric_only)

def trigger_no_applicable_columns_error_feature_generator():
    print("Attempting to fit FeatureGenerator on a DataFrame with no numeric columns...")
    FeatureGenerator().fit(df_text_only)

demonstrate_error(trigger_no_applicable_columns_error_scaler, NoApplicableColumnsError)
demonstrate_error(trigger_no_applicable_columns_error_encoder, NoApplicableColumnsError)
demonstrate_error(trigger_no_applicable_columns_error_outlier_handler, NoApplicableColumnsError)
demonstrate_error(trigger_no_applicable_columns_error_datetime_extractor, NoApplicableColumnsError)
demonstrate_error(trigger_no_applicable_columns_error_feature_generator, NoApplicableColumnsError)


--- Demonstrating NoApplicableColumnsError ---
Attempting to fit Scaler on a DataFrame with no numeric columns...
Caught expected NoApplicableColumnsError:
Scaler found no numeric columns to scale in the provided DataFrame. Columns available: ['col1', 'col2']

--- Demonstrating NoApplicableColumnsError ---
Attempting to fit Encoder on a DataFrame with no categorical columns...
Caught expected NoApplicableColumnsError:
Encoder found no 'object' or 'category' columns to encode. Columns available: ['col1', 'col2']

--- Demonstrating NoApplicableColumnsError ---
Attempting to fit OutlierHandler on a DataFrame with no numeric columns...
Caught expected NoApplicableColumnsError:
OutlierHandler found no numeric columns to process. Columns available: ['col1', 'col2']

--- Demonstrating NoApplicableColumnsError ---
Attempting to fit DatetimeFeatureExtractor on a DataFrame with no datetime columns...
Caught expected NoApplicableColumnsError:
DatetimeFeatureExtractor found no convertible datetim

## 3. `PipelineLogicError`

Raised when the order of transformers in a `Pipeline` is likely to cause unintended or incorrect behavior (e.g., scaling before encoding).

In [5]:
def trigger_pipeline_logic_error():
    print("Attempting to create a Pipeline with Scaler before Encoder...")
    Pipeline(steps=[
        ("scaler", Scaler()),
        ("encoder", Encoder())
    ])

demonstrate_error(trigger_pipeline_logic_error, PipelineLogicError)


--- Demonstrating PipelineLogicError ---
Attempting to create a Pipeline with Scaler before Encoder...
Caught expected PipelineLogicError:
Logical order error: Step 'scaler' (Scaler) appears before step 'encoder' (Encoder). Numeric transformers should run AFTER categorical encoding.


## 4. `InvalidStepError`

Raised when a step in a `Pipeline` or `ColumnTransformer` is not a valid transformer (i.e., doesn't have `fit` and `transform` methods).

In [6]:
def trigger_invalid_step_error_pipeline():
    print("Attempting to create a Pipeline with an invalid step (an integer)...")
    Pipeline(steps=[
        ("valid_step", Scaler()),
        ("invalid_step", 123) # Not a transformer
    ])

def trigger_invalid_step_error_column_transformer():
    print("Attempting to create a ColumnTransformer with an invalid sub-transformer...")
    ColumnTransformer(transformers=[
        ("numeric", Scaler(), ['col1']),
        ("invalid", "not_a_transformer", ['col2']) # Invalid string
    ])

demonstrate_error(trigger_invalid_step_error_pipeline, InvalidStepError)
demonstrate_error(trigger_invalid_step_error_column_transformer, InvalidStepError)


--- Demonstrating InvalidStepError ---
Attempting to create a Pipeline with an invalid step (an integer)...
Caught expected InvalidStepError:
All steps in a pipeline must be transformers with 'fit' and 'transform' methods, or the string 'passthrough'. Step 'invalid_step' is of type int which is not a valid transformer.

--- Demonstrating InvalidStepError ---
Attempting to create a ColumnTransformer with an invalid sub-transformer...
Caught expected InvalidStepError:
Transformer 'invalid' is not valid. It must be an object with 'fit' and 'transform' methods, or one of 'passthrough', 'drop'.


## 5. `ColumnMismatchError`

Raised when a transformer is asked to `transform` data that is missing columns it was `fit` on.

In [7]:
df_fit = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
df_transform_missing = pd.DataFrame({"A": [7, 8, 9]})

def trigger_column_mismatch_error():
    print("Attempting to transform data with missing columns...")
    scaler = Scaler()
    scaler.fit(df_fit)
    scaler.transform(df_transform_missing)

demonstrate_error(trigger_column_mismatch_error, ColumnMismatchError)


--- Demonstrating ColumnMismatchError ---
Attempting to transform data with missing columns...
Caught expected ColumnMismatchError:
Missing columns for Scaler(method='minmax'). Transformer was fitted on ['A', 'B'], but the following columns are missing from the input: ['B'].


## 6. `NotFittedError`

Raised when `transform` is called on a transformer that has not yet been `fit`.

In [8]:
df_sample = pd.DataFrame({"A": [1, 2, 3]})

def trigger_not_fitted_error():
    print("Attempting to transform with an unfitted Scaler...")
    scaler = Scaler()
    scaler.transform(df_sample)

demonstrate_error(trigger_not_fitted_error, NotFittedError)


--- Demonstrating NotFittedError ---
Attempting to transform with an unfitted Scaler...
Caught expected NotFittedError:
Transformer Scaler(method='minmax') is not fitted. Call .fit() first.


## 7. `FrozenTransformerError`

Raised when `fit` is called on a transformer that has been explicitly `freeze()`d.

In [9]:
df_sample = pd.DataFrame({"A": [1, 2, 3]})

def trigger_frozen_transformer_error():
    print("Attempting to refit a frozen Scaler...")
    scaler = Scaler()
    scaler.fit(df_sample)
    scaler.freeze()
    scaler.fit(df_sample) # This should fail

demonstrate_error(trigger_frozen_transformer_error, FrozenTransformerError)


--- Demonstrating FrozenTransformerError ---
Attempting to refit a frozen Scaler...
Caught expected FrozenTransformerError:
Transformer Scaler(method='minmax') is frozen and cannot be refit.


## 8. `PipelineProcessingError`

Raised when an error occurs during the execution of a specific step within a `Pipeline` or `ColumnTransformer`. This wraps the original error, providing context about which step failed.

In [10]:
# Define a dummy transformer that always fails during transform
class FailingTransformer(BaseTransformer):
    def _fit(self, X, y=None):
        pass
    def _transform(self, X):
        raise ValueError("This transformer is designed to fail!")

df_sample = pd.DataFrame({"A": [1, 2, 3]})

def trigger_pipeline_processing_error_pipeline():
    print("Attempting to run a Pipeline with a failing step...")
    pipeline = Pipeline(steps=[
        ("good_step", Scaler()),
        ("bad_step", FailingTransformer())
    ])
    pipeline.fit_transform(df_sample)

def trigger_pipeline_processing_error_column_transformer():
    print("Attempting to run a ColumnTransformer with a failing sub-transformer...")
    ct = ColumnTransformer(transformers=[
        ("good_numeric", Scaler(), ['A']),
        ("bad_numeric", FailingTransformer(), ['B']) # This will fail
    ])
    df_mixed = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
    ct.fit_transform(df_mixed)

demonstrate_error(trigger_pipeline_processing_error_pipeline, PipelineProcessingError)
demonstrate_error(trigger_pipeline_processing_error_column_transformer, PipelineProcessingError)


--- Demonstrating PipelineProcessingError ---
Attempting to run a Pipeline with a failing step...
Caught expected PipelineProcessingError:
Error during 'fit_transform' in step 'bad_step' (FailingTransformer): This transformer is designed to fail!

--- Demonstrating PipelineProcessingError ---
Attempting to run a ColumnTransformer with a failing sub-transformer...
Caught expected PipelineProcessingError:
Error during 'transform' in ColumnTransformer step 'bad_numeric': This transformer is designed to fail!
