# Pipeline Example

In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [3]:
# Create a synthetic regression dataset
X, y = make_regression(
    n_samples=1000, n_features=10, noise=0.1, random_state=42
)

## Train - test split

In [6]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

## Pipeline Design
Well it is similar to Workflow in Petrel. Self-explained

In [7]:
# Define a pipeline with two steps: feature scaling and linear regression
pipeline = Pipeline(
    [
        ("scaler", StandardScaler()),  # Step 1: Feature Scaling
        ("regressor", LinearRegression()),  # Step 2: Linear Regression
    ]
)

# Fit the pipeline to the training data
pipeline.fit(X_train, y_train)

# Predict using the pipeline
y_pred = pipeline.predict(X_test)

# Calculate the Mean Squared Error (MSE) of the predictions
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)

Mean Squared Error: 0.009511914910420161
