In [6]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
import pandas as pd

# Load iris dataset
iris = load_iris(as_frame=True)
X = iris.data
y = iris.target

# For demonstration, simulate a categorical feature
X['petal_length_category'] = pd.cut(X['petal length (cm)'], bins=3, labels=["short", "medium", "long"])

# Define column types
numeric_features = ['sepal length (cm)', 'sepal width (cm)', 'petal width (cm)']
categorical_features = ['petal_length_category']

# Preprocessing transformers
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

# ColumnTransformer to apply appropriate preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ]
)



In [8]:

# Final pipeline
pipeline = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])


In [10]:

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline
pipeline.fit(X_train, y_train)

# Evaluate using cross-validation
scores = cross_val_score(pipeline, X, y, cv=5, scoring='accuracy')
print(f"Cross-validated accuracy: {scores.mean():.3f}")

Cross-validated accuracy: 0.973
