# Preparation

<b>Load Libraries</b>

In [6]:
# udf and udc
import os, sys
cwd = os.getcwd()
os.chdir('../')
path_to_src = os.getcwd()
os.chdir(cwd)
if path_to_src not in sys.path:
    sys.path.append(path_to_src)
from src.notebook.support import *
from src.mlflow.support import *

# others
from warnings import simplefilter

<b>Load Dataset</b>

In [8]:
# materials
materials = prepare_data_to_train(path='../cache/data/cleaned.csv')

<b>MLflow</b>

In [9]:
# local server
mlflow.set_tracking_uri('http://127.0.0.1:5000/')
experiment_name = 'Feature engineering'
try:
    mlflow.create_experiment(name=experiment_name, artifact_location='../cache/mlflow/runs/')
except:
    mlflow.set_experiment(experiment_name=experiment_name)

# 1. LR && XGB

In [None]:
run_id = mlflow.search_runs(
    filter_string="tags.base_model = 'LR' and tags.pre_pro = 'si(mean)_std_ohe_smoteenn'", 
    order_by=["metrics.val_fbeta DESC"]
)['run_id'].values[0]
path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/artifacts")
path_lr = {'feature_selector': f"{path}/feature_selector.pkl", 
           'model': f"{path}/model.pkl"}
lr_as_base = FSBaseClassifier(path_lr)

In [None]:
run_id = mlflow.search_runs(
    filter_string="tags.base_model = 'XGB' and tags.pre_pro = 'si(mean)_std_ohe_smoteenn'", 
    order_by=["metrics.val_fbeta DESC"]
)['run_id'].values[0]
path = mlflow.artifacts.download_artifacts(f"runs:/{run_id}/model/artifacts")
path_xgb = {'feature_selector': f"{path}/feature_selector.pkl", 
            'model': f"{path}/model.pkl"}
xgb_as_base = FSBaseClassifier(path_xgb)

In [None]:
stacking = StackingClassifier(estimators=[('LR', lr_as_base), ('XGB', xgb_as_base)], 
                              final_estimator=LogisticRegression(n_jobs=-1),
                              stack_method='predict')
stacking.fit(materials['X_train'], materials['y_train'])
val_predictions = stacking.predict(materials['X_test'])
val_fbeta = fbeta_score(materials['y_test'], val_predictions, 
                        beta=2)
print(f"FBeta score: {val_fbeta} \n{classification_report(materials['y_test'], val_predictions)}")