In [2]:
import utils
from sklearn.pipeline import Pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

In [7]:
param_grid = {
    'stacking__final_estimator__penalty': ['l1', 'l2'],  # Regularization penalty
    'stacking__final_estimator__C': [0.1, 1.0, 10.0],   # Inverse of regularization strength
    'stacking__final_estimator__solver': ['liblinear', 'saga']  # Algorithm to use in optimization problem
}

In [8]:
from tempfile import mkdtemp

cachedir = mkdtemp() # caching the pipeline
cachedir

'/tmp/tmpim7ibtb1'

In [None]:
import joblib

dt = joblib.load("results/best_decision_tree/best_dt.joblib")
rf = joblib.load("results/best_random_forest/best_rf.joblib")
lr = LogisticRegression(random_state=utils.RANDOM_STATE, n_jobs=-1)

In [9]:
from sklearn.preprocessing import MinMaxScaler

preprocess_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
],
memory=cachedir)

base_estimators = [
    ('rf', rf),
    ('dt', dt)
]

stacking_clf = StackingClassifier(
    n_jobs = -1,
    estimators=base_estimators,
    final_estimator= lr
)

stacking_pipeline = Pipeline([
    ('preprocess', preprocess_pipeline),
    ('stacking', stacking_clf)
])
stacking_pipeline

In [None]:
best_pipe = utils.find_best_model(stacking_pipeline, param_grid, cv = 5, n_jobs=1)

In [None]:
utils.save_model(best_pipe, "results/best_stacking", "best_stacking.joblib")

In [None]:
utils.evaluate_all_model(best_pipe, "results/best_stacking")

In [None]:
from shutil import rmtree

rmtree(cachedir)