In [3]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd

class DataFrameCleaner(BaseEstimator, TransformerMixin):
    def __init__(self, fill_value=0):
        """
        Initialize the DataFrameCleaner.

        :param fill_value: Value to fill missing data with (default is 0)
        """
        self.fill_value = fill_value

    def fit(self, X, y=None):
        """
        Fit method. Required for compatibility but does not learn anything.

        :param X: DataFrame to fit
        :param y: Target variable (ignored)
        :return: self
        """
        return self

    def transform(self, X):
        """
        Transform method to clean the DataFrame.

        :param X: DataFrame to transform
        :return: Transformed DataFrame
        """
        # Make a copy of the DataFrame to avoid modifying the original
        X_clean = X.copy()

        # Fill missing values
        X_clean.fillna(self.fill_value, inplace=True)

        return X_clean

In [4]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Sample data
data = {
    'A': [1, 2, None, 4],
    'B': [None, 2, 3, 4],
    'C': [1, None, None, 4]
}
df = pd.DataFrame(data)

# Define the pipeline
pipeline = Pipeline(steps=[
    ('cleaning', DataFrameCleaner(fill_value=0)),  # Custom cleaning step
    ('scaling', StandardScaler())  # Example of additional preprocessing step
])

# Fit and transform the data using the pipeline
cleaned_data = pipeline.fit_transform(df)
print("Data after pipeline processing:")
print(cleaned_data)

Data after pipeline processing:
[[-0.50709255 -1.52127766 -0.15249857]
 [ 0.16903085 -0.16903085 -0.76249285]
 [-1.18321596  0.50709255 -0.76249285]
 [ 1.52127766  1.18321596  1.67748427]]
