# Supervised Learning Pipeline with sklearn

In [None]:
import pandas as pd
import numpy as np

from sklearn import set_config
from sklearn import ColumnTransformer, make_column_selector
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.liner_model import LogisticRegression, SGDClassifier
from sklearn.model_selection import train_test_split, GridSearchCV

### Create model switcher class for pipeline grid search

In [None]:
from sklearn.base import BaseEstimator

class ModelSwitcher(BaseEstimator):
    def __init__(
        self,
        estimator = SGDClassifier(),
    ):
        """
        A custome BaseEstimator that can switch between classifiers.
        :param estimator: sklearn object - the classifier
        """
        self.estimator = estimator
        
    def fit(self, X, y=None, **kwargs):
        self.estimator.fit(X, y)
        return self
    
    def predict(self, X, y=None):
        return self.estimator.predict(X)
    
    def predict_proba(self, X):
        return self.estimator.predict_proba(X)
    
    def score(self, X, y):
        return self.estimator.score(X, y)

### Make preprocessor with ColumnTransformer

In [None]:
def make_preprocessor(numeric_features, categorical_features):
    if numeric_features:
        numeric_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='median')),
            ('scaler', StandardScaler())
        ])
    else:
        numeric_transformer = 'passthrough'
        
    if categorical_features:
        categorical_transformer = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
    else:
        categorical_transformer = 'passthrough'
        
    preprocessor = ColumnTransformer(
        transformers=[
            ('numeric', numeric_transformer, numeric_features),
            ('categorical', categorical_transformer, categorical_features),
        ])
    
    return preprocessor

### Make Grid Search on pipeline

In [None]:
def make_grid_search(pipe, n_jobs, param_grid):
    if not param_grid:
        raise ValueError('Grid Search Hyperparameters is None!')
    else:
        grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)
    
    return grid

### Manually set hyperparameters for pipeline grid search
(**Your program start from here**)

In [None]:
param_preprocessor = {
    'preprocessor__numeric__imputer__strategy': ['mean', 'median']
}

param_model = [
    {
        'model_switcher__estimator': [SGDclassifier()],
        'model_switcher__estimator__penalty': ['l1', 'l2', 'elasticnet'],
        'model_switcher__estimator__max_iter': [150],
        'model_switcher__estimator__tol': [1e-4],
        'model_switcher__estimator__loss': ['hinge'],
    },
    {
        'model_switcher__estimator': [LogisticRegression()],
        'model_switcher__estimator__C': [5, 10, 20],
    },
]

param_grid = [{**param, **param_preprocessor} for param in param_model]