### Pipelines, actions and functions


---


In [17]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC


# Custom Pipeline class
class CustomPipeline:
    def __init__(self, steps):
        self.steps = steps  # Steps will be a list of tuples (name, function)
        self.model = None  # To store the trained model
        self.scaler = None  # To store the scaler for transformation

    def fit(self, X_train, y_train):
        for name, step in self.steps:
            print(f"Executing {name}...")
            if name == "train":  # Special case for training step, it returns the model
                self.model = step(X_train, y_train)  # Save the trained model
            else:
                X_train, y_train = step(X_train, y_train)  # Apply each function step

    def transform(self, X_test):
        for name, step in self.steps:
            print(f"Executing {name} on test data...")
            if name == "train":
                continue  # Skip the training step for test data
            X_test, _ = step(X_test, None)  # Apply each step to test data
        return X_test


# Step 1: Standardizing the data (Action)
def standardize_data(X, y):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y  # Return the standardized data with original labels


# Step 2: Training the model (Action)
def train_model(X, y):
    model = SVC()
    model.fit(X, y)
    return model  # Return the trained model


# Load dataset (Iris dataset)
data = load_iris()
X = data.data  # Features
y = data.target  # Labels

# Split the dataset into training and test sets (70% for training, 30% for testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Define steps for the pipeline
steps = [
    ("standardize", standardize_data),  # Step 1: Standardize data
    ("train", train_model),  # Step 2: Train the model
]

# Create custom pipeline
pipeline = CustomPipeline(steps)

# Train the model using the training data
pipeline.fit(X_train, y_train)

# After training, the model is stored in the pipeline instance, so we call evaluate
# Transform the test data (standardize only)
X_test_scaled = pipeline.transform(
    X_test
)  # Only apply transformation (standardization) to test data

Executing standardize...
Executing train...
Executing standardize on test data...
Executing train on test data...


In [18]:
# Now evaluate the model on the test data
accuracy = pipeline.model.score(X_test_scaled, y_test)
print(f"Model accuracy: {accuracy:.2f}")  # Print model accuracy

Model accuracy: 0.96
