In [2]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load Iris dataset
iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target

# Introduce some missing values for demonstration purposes
import numpy as np
df.loc[0, 'sepal length (cm)'] = np.nan
df.loc[10, 'sepal width (cm)'] = np.nan

# Separate features and target variable
X = df.drop('target', axis=1)
y = df['target']

# Define preprocessing for numerical features (impute missing values and scale)
numerical_features = X.columns
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

# Define polynomial feature generation
polynomial_transformer = PolynomialFeatures(degree=2, include_bias=False)

# Combine preprocessing and feature engineering
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features)
    ])

# Create and fit the preprocessing pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('poly', polynomial_transformer)
])

X_preprocessed = pipeline.fit_transform(X)

# Convert preprocessed data back to a DataFrame for better understanding
feature_names = polynomial_transformer.get_feature_names_out(numerical_features)
X_preprocessed_df = pd.DataFrame(X_preprocessed, columns=feature_names)

print(X_preprocessed_df.head())


   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0           0.000000          1.036574          -1.340227         -1.315444   
1          -1.152203         -0.122957          -1.340227         -1.315444   
2          -1.395201          0.340856          -1.397064         -1.315444   
3          -1.516700          0.108949          -1.283389         -1.315444   
4          -1.030704          1.268481          -1.340227         -1.315444   

   sepal length (cm)^2  sepal length (cm) sepal width (cm)  \
0             0.000000                            0.000000   
1             1.327571                            0.141671   
2             1.946585                           -0.475562   
3             2.300378                           -0.165243   
4             1.062350                           -1.307427   

   sepal length (cm) petal length (cm)  sepal length (cm) petal width (cm)  \
0                            -0.000000                           -0.000000