In [1]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Sample data with a categorical variable 'Color'
data = {'Color': ['Red', 'Green', 'Blue', 'Red', 'Blue']}

# Create a DataFrame
import pandas as pd
df = pd.DataFrame(data)

# Define the columns to be one-hot encoded
categorical_cols = ['Color']

# Create a ColumnTransformer to apply one-hot encoding to the specified columns
preprocessor = ColumnTransformer(
    transformers=[
        ('onehot', OneHotEncoder(), categorical_cols)
    ])

# Create a pipeline with the specified preprocessor
pipeline = Pipeline(steps=[('preprocessor', preprocessor)])

# Fit and transform the data using the pipeline
transformed_data = pipeline.fit_transform(df)

# Convert the transformed data back to a DataFrame for visualization
transformed_df = pd.DataFrame(transformed_data, columns=pipeline.named_steps['preprocessor'].named_transformers_['onehot'].get_feature_names_out(input_features=categorical_cols))

# Display the original and transformed data
print("Original Data:")
print(df)
print("\nTransformed Data:")
print(transformed_df)


Original Data:
   Color
0    Red
1  Green
2   Blue
3    Red
4   Blue

Transformed Data:
   Color_Blue  Color_Green  Color_Red
0         0.0          0.0        1.0
1         0.0          1.0        0.0
2         1.0          0.0        0.0
3         0.0          0.0        1.0
4         1.0          0.0        0.0
