In [3]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split


In [4]:
from sklearn.datasets import load_iris

data = load_iris()
X, y = data.data, data.target

In [5]:
preprocessing_steps = [
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
]

In [6]:
preprocessing_pipeline = Pipeline(preprocessing_steps)

In [7]:
from sklearn.linear_model import LogisticRegression

In [8]:
model_pipeline = Pipeline([
    ('preprocessing', preprocessing_pipeline),
    ('classifier', LogisticRegression())
])

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(model_pipeline, X_train, y_train, cv=5)
print("Cross-validation scores:", scores)
print("Mean cross-validation score: {:.2f}".format(scores.mean()))

Cross-validation scores: [0.95833333 1.         0.875      1.         0.95833333]
Mean cross-validation score: 0.96


In [11]:
model_pipeline.fit(X_train, y_train)
test_score = model_pipeline.score(X_test, y_test)
print("Test set score: {:.2f}".format(test_score))

Test set score: 1.00
