In [4]:
import hydra
from omegaconf import DictConfig
from sklearn.pipeline import Pipeline

# Simple pipeline using only builtin steps

In [5]:
from sklearn.feature_selection import VarianceThreshold
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

num_pipeline = Pipeline(
    [
        ("imputer", SimpleImputer(strategy="constant", fill_value="-1")),
        ("VarianceThreshold", VarianceThreshold(threshold=0.1)),
        ("scaler", StandardScaler()),
    ]
)

### Sometimes we want to examine different preprocessing pipelines with different order, steps, hyperparams etc.

In [7]:
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

pipeline_lr = Pipeline(
    [
        ("imputer", SimpleImputer(strategy="constant", fill_value="-1")),
        ("VarianceThreshold", VarianceThreshold(threshold=0.1)),
        ("scalar1", StandardScaler()),
        ("pca1", PCA(n_components=2)),
        ("lr_classifier", LogisticRegression(random_state=0)),
    ]
)


pipeline_dt = Pipeline(
    [
        ("scalar2", StandardScaler()),
        ("VarianceThreshold", VarianceThreshold(threshold=0.1)),
        ("imputer", SimpleImputer(strategy="constant", fill_value="-1")),
        ("pca2", PCA(n_components=2)),
        ("dt_classifier", DecisionTreeClassifier()),
    ]
)

pipeline_randomforest = Pipeline(
    [
        ("scalar3", StandardScaler()),
        ("pca3", PCA(n_components=2)),
        ("rf_classifier", RandomForestClassifier()),
    ]
)

## Or maybe add your custom steps

In [4]:
def make_hydra_pipeline(steps_config: DictConfig):

    """creates a pipeline with all the feature selectors/transformers inside, ordered in a sequential manner

    Args:
        steps_config (DictConfig): the config containing the instructions for
                                    creating the feature selectors or transformers

    Returns:
        [sklearn.pipeline.Pipeline]: a pipeline with all the feature selectors/transformers inside,
                                    in a sequential manner
    """
    steps = []

    for step_config in steps_config:

        # retrieve the name and parameter dictionary of the current feature selector
        step_name, step_params = step_config.items()[0]

        # create the feature selector/transformer - the pipeline step, and append to the list of steps
        pipeline_step = (step_name, hydra.utils.instantiate(step_params))
        steps.append(pipeline_step)

    return Pipeline(steps)