In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
import mlflow
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from typing import List, Tuple, Any, Dict
from enum import Enum
from sklearn.base import BaseEstimator, TransformerMixin
import matplotlib.pyplot as plt
import seaborn as sns
import os
from sklearn.model_selection import (
    StratifiedKFold,
)

from mlflow.models.signature import infer_signature
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    balanced_accuracy_score,
)
from sklearn.preprocessing import PolynomialFeatures

from sklearn.metrics import classification_report
from functools import partial
import optuna
import lightgbm as lgb

HOST = "127.0.0.1"
PORT = "8080"
mlflow.set_tracking_uri(uri=f"http://{HOST}:{PORT}")


## Import données

In [14]:
data = pd.read_csv("./data/application_train.csv")

print(data.shape)

data.head()


(307511, 122)


Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,...,0,0,0,0,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
def _check_most_frequent_values(
    df: pd.DataFrame, columns: list, col_type: str
) -> None:
    """Check and warn about multiple most frequent values in categorical columns"""
    for col in columns:
        value_counts = df[col].value_counts()
        if (value_counts == value_counts.iloc[0]).sum() > 1:
            print(
                f"Warning: Multiple most frequent values in {col_type} column {col}"
            )
            print(
                f"Values: {list(value_counts[value_counts == value_counts.iloc[0]].index)}"
            )


def get_column_types(
    df: pd.DataFrame,
) -> Tuple[List[str], List[str], List[str]]:
    """
    Identify and categorize column types based on data characteristics.

    Args:
        df: Input DataFrame

    Returns:
        Tuple containing lists of numerical, binary and multiclass column names
    """
    numerical_columns = df.select_dtypes(
        include=["int64", "float64"]
    ).columns.tolist()
    categorical_columns = df.select_dtypes(
        include=["object", "category"]
    ).columns.tolist()

    binary_columns = [
        col for col in categorical_columns if df[col].nunique() <= 2
    ]
    multiclass_columns = [
        col for col in categorical_columns if df[col].nunique() > 2
    ]

    print(f"Found {len(numerical_columns)} numerical features")
    print(f"Found {len(binary_columns)} binary features")
    print(f"Found {len(multiclass_columns)} categorical features")

    return numerical_columns, binary_columns, multiclass_columns


class OutlierStrategy(str, Enum):
    KEEP = "keep"
    REPLACE_WITH_NAN = "replace_with_nan"
    DROP = "drop"


class OutlierHandler(BaseEstimator, TransformerMixin):
    def __init__(
        self,
        strategy: OutlierStrategy = OutlierStrategy.KEEP,
        threshold: float = 0.1,
        columns: List[str] = None,
    ):
        self.strategy = strategy
        self.threshold = threshold
        self.columns = columns
        self.lower_bounds_ = {}
        self.upper_bounds_ = {}
        self.to_drop_idx_ = None

    def fit(self, X, y=None):
        X = pd.DataFrame(X)
        cols = self.columns or X.select_dtypes(include=["number"]).columns

        for column in cols:
            quantiles = X[column].quantile([self.threshold, 1 - self.threshold])
            self.lower_bounds_[column] = quantiles.iloc[0]
            self.upper_bounds_[column] = quantiles.iloc[1]
        return self

    def transform(self, X):
        X = pd.DataFrame(X).copy()
        cols = self.columns or X.select_dtypes(include=["number"]).columns

        if self.strategy == OutlierStrategy.KEEP:
            return X

        mask = np.ones(len(X), dtype=bool)

        for column in cols:
            outlier_mask = (X[column] < self.lower_bounds_[column]) | (
                X[column] > self.upper_bounds_[column]
            )
            if self.strategy == OutlierStrategy.REPLACE_WITH_NAN:
                X.loc[outlier_mask, column] = np.nan
            elif self.strategy == OutlierStrategy.DROP:
                mask &= ~outlier_mask

        if self.strategy == OutlierStrategy.DROP:
            self.to_drop_idx_ = X.index[~mask]
            X = X[mask]

        return X


class DropMissingColumns(BaseEstimator, TransformerMixin):
    def __init__(self, threshold=0.4):
        """
        Drops columns with missing value rate >= threshold.
        """
        self.threshold = threshold
        self.cols_to_drop_ = []

    def fit(self, X, y=None):
        X = pd.DataFrame(X)
        missing_ratio = X.isnull().mean()
        self.cols_to_drop_ = missing_ratio[
            missing_ratio >= self.threshold
        ].index.tolist()

        if self.cols_to_drop_:
            print(
                f"🧹 Dropping {len(self.cols_to_drop_)} column(s) with missing ratio ≥ {self.threshold}:"
            )
            for col in self.cols_to_drop_:
                print(f" - {col}")

        return self

    def transform(self, X):
        return pd.DataFrame(X).drop(columns=self.cols_to_drop_, errors="ignore")

    @property
    def removed_columns_(self):
        return self.cols_to_drop_


def create_preprocessing_pipeline(
    df: pd.DataFrame,
    numerical_cols: List[str],
    binary_cols: List[str],
    multiclass_cols: List[str],
    categorical_strategy: str,
    encode_categoricals: bool = True,
    polynomial_features: bool = False,
) -> ColumnTransformer:
    """
    Creates a preprocessing pipeline for numerical, binary, and multiclass categorical features.

    Args:
        df (pd.DataFrame): The full dataframe used for checking value consistency if needed.
        numerical_cols (List[str]): List of numerical feature column names.
        binary_cols (List[str]): List of binary categorical feature column names.
        multiclass_cols (List[str]): List of multi-category feature column names.
        categorical_strategy (str): Strategy for imputing categorical values ("most_frequent", "constant", etc.).
        encode_categoricals (bool): Whether to encode categorical features.

    Returns:
        ColumnTransformer: A scikit-learn column transformer pipeline.
    """

    transformers = []

    # --- Numerical columns ---
    if numerical_cols:
        steps = [("imputer", SimpleImputer(strategy="median"))]
        if polynomial_features:
            steps.append(
                (
                    "poly",
                    PolynomialFeatures(
                        degree=3, include_bias=False, interaction_only=False
                    ),
                )
            )
        steps.append(("scaler", MinMaxScaler()))

        numerical_pipeline = Pipeline(steps)
        transformers.append(("numerical", numerical_pipeline, numerical_cols))

    # --- Categorical encoding ---
    if encode_categoricals:
        # --- Binary columns ---
        if binary_cols:
            if categorical_strategy == "most_frequent":
                _check_most_frequent_values(df, binary_cols, "binary")

            binary_pipeline = Pipeline(
                [
                    (
                        "imputer",
                        SimpleImputer(
                            strategy=categorical_strategy,
                            fill_value="missing"
                            if categorical_strategy == "constant"
                            else None,
                        ),
                    ),
                    ("encoder", OrdinalEncoder()),
                ]
            )
            transformers.append(("binary", binary_pipeline, binary_cols))

        # --- Multiclass columns ---
        if multiclass_cols:
            if categorical_strategy == "most_frequent":
                _check_most_frequent_values(df, multiclass_cols, "multiclass")

            multiclass_pipeline = Pipeline(
                [
                    (
                        "imputer",
                        SimpleImputer(
                            strategy=categorical_strategy,
                            fill_value="missing"
                            if categorical_strategy == "constant"
                            else None,
                        ),
                    ),
                    (
                        "onehot",
                        OneHotEncoder(
                            drop="first",
                            sparse_output=False,
                            handle_unknown="ignore",
                        ),
                    ),
                ]
            )
            transformers.append(
                ("multiclass", multiclass_pipeline, multiclass_cols)
            )

    preprocessor = ColumnTransformer(transformers)
    return preprocessor


def prepare_data(
    df: pd.DataFrame,
    target_column: str,
    categorical_strategy: str,
    outlier_strategy: str,
    outlier_threshold: float = 0.1,
    test_size: float = 0.2,
    random_state: int = 42,
    encode_categoricals: bool = True,
    drop_missing_columns: bool = True,
    missing_col_threshold: float = 0.4,
    polynomial_features: bool = False,
):
    """Prepare data for modeling using pipeline approach"""

    X = df.drop(columns=[target_column])
    y = df[target_column]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )

    # Manage missing values
    if drop_missing_columns:
        dropper = DropMissingColumns(threshold=missing_col_threshold)
        X_train = dropper.fit_transform(X_train)
        X_test = dropper.transform(X_test)

    numerical_cols, binary_cols, multiclass_cols = get_column_types(X_train)

    if outlier_strategy != OutlierStrategy.KEEP:
        outlier_handler = OutlierHandler(
            strategy=outlier_strategy,
            threshold=outlier_threshold,
            columns=numerical_cols,
        )
        X_train = outlier_handler.fit_transform(X_train)

        if (
            outlier_strategy == OutlierStrategy.DROP
            and outlier_handler.to_drop_idx_ is not None
        ):
            kept_index = X_train.index
            X_train = X_train
            y_train = y_train.loc[kept_index]
        else:
            X_train = X_train

    preprocessor = create_preprocessing_pipeline(
        X_train,
        numerical_cols,
        binary_cols,
        multiclass_cols,
        categorical_strategy=categorical_strategy,
        encode_categoricals=encode_categoricals,
        polynomial_features=polynomial_features,
    )

    # X_train_processed = preprocessor.fit_transform(X_train)
    # X_test_processed = preprocessor.transform(X_test)

    return X_train, X_test, y_train, y_test, preprocessor


# Test pipeline

In [16]:
categorical_strategies = ["constant", "most_frequent"]
outlier_strategies = [
    OutlierStrategy.KEEP,
    OutlierStrategy.REPLACE_WITH_NAN,
    OutlierStrategy.DROP,
]

results = {}

for cat_strategy in categorical_strategies:
    for out_strategy in outlier_strategies:
        print("\nTesting combination:")
        print(f"Categorical strategy: {cat_strategy}")
        print(f"Outlier strategy: {out_strategy}")

        X_train_proc, X_test_proc, y_train_proc, y_test_proc, preproc = (
            prepare_data(
                data,
                target_column="TARGET",
                categorical_strategy=cat_strategy,
                outlier_strategy=out_strategy,
                outlier_threshold=0.05,
                test_size=0.2,
                random_state=42,
            )
        )

        results[(cat_strategy, out_strategy)] = {
            "X_train_shape": X_train_proc.shape,
            "X_test_shape": X_test_proc.shape,
            "success": True,
        }

        print("Shapes après transformation:")
        print(f"X_train: {X_train_proc.shape}")
        print(f"X_test: {X_test_proc.shape}")

print("\nRésumé des tests:")
print("-" * 50)
for (cat_strategy, out_strategy), result in results.items():
    print(f"\nCategorical: {cat_strategy}")
    print(f"Outlier: {out_strategy}")
    if result["success"]:
        print("✓ Success")
        print(f"  Train shape: {result['X_train_shape']}")
        print(f"  Test shape: {result['X_test_shape']}")
    else:
        print("✗ Failed")
        print(f"  Error: {result['error']}")



Testing combination:
Categorical strategy: constant
Outlier strategy: OutlierStrategy.KEEP
🧹 Dropping 49 column(s) with missing ratio ≥ 0.4:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BEGINEXPLUATATION_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMAX_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BEGINEXPLUATATION_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMAX_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BEGINEXPLUATATION_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMAX_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIV

In [17]:
X_train_proc, X_test_proc, y_train_proc, y_test_proc, preproc = prepare_data(
    data[
        ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH", "TARGET"]
    ],
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="keep",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=False,
    drop_missing_columns=True,
    missing_col_threshold=0.6,
    polynomial_features=True,
)
X_train_proc


Found 4 numerical features
Found 0 binary features
Found 0 categorical features


Unnamed: 0,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,DAYS_BIRTH
181648,0.384582,0.289573,0.622922,-12298
229245,,0.514261,,-15375
122525,,0.486906,0.598926,-19307
306311,,0.675705,0.454321,-17791
300658,,0.154565,,-8486
...,...,...,...,...
31304,,0.549668,,-15374
121193,,0.569702,0.600658,-19035
248504,,0.461966,0.683269,-23088
175469,,0.459173,0.812823,-22148


# MLFLOW

In [18]:
def plot_confusion_matrix_heatmap(cm, labels, title="Confusion Matrix"):
    plt.figure(figsize=(6, 4))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=labels,
        yticklabels=labels,
    )
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.title(title)
    plt.tight_layout()
    img_path = "confusion_matrix_heatmap.png"
    plt.savefig(img_path)
    plt.close()
    return img_path


def evaluate_model(y_true, y_pred, y_proba):
    cm = confusion_matrix(y_true, y_pred)
    metrics = {
        "accuracy": accuracy_score(y_true, y_pred),
        "balanced_accuracy": balanced_accuracy_score(y_true, y_pred),
        "precision": precision_score(y_true, y_pred),
        "recall": recall_score(y_true, y_pred),
        "f1": f1_score(y_true, y_pred),
        "roc_auc": roc_auc_score(y_true, y_proba),
        "combined_score": 0.7 * recall_score(y_true, y_pred)
        + 0.3 * precision_score(y_true, y_pred),
    }
    return metrics, cm


def log_mlflow_models(
    df: pd.DataFrame, pipeline: Any, artifact_path: str
) -> None:
    """
    Logs a trained scikit-learn pipeline to MLflow, including input signature and example.

    Args:
        df (pd.DataFrame): Input features used to infer the model signature and create an input example.
        pipeline (Any): Trained scikit-learn pipeline to log.
        artifact_path (str): Path in the MLflow run where the model will be saved (e.g., "model", "model_final").

    Returns:
        None
    """

    signature = infer_signature(df, pipeline.predict(df))
    input_example = df.dropna().iloc[:5]

    mlflow.sklearn.log_model(
        pipeline,
        artifact_path=artifact_path,
        signature=signature,
        input_example=input_example,
    )


In [33]:
def run_experiment(
    model_name,
    model_class,
    param_space_fn,
    dataset_name,
    X_train,
    X_test,
    y_train,
    y_test,
    preprocessor,
    cv=5,
    n_trials=30,
):
    experiment_name = f"{model_name}"
    mlflow.set_experiment(experiment_name)

    with mlflow.start_run(run_name=f"{model_name}__{dataset_name}__study_run"):
        mlflow.set_tag("model_type", model_name)
        mlflow.set_tag("dataset", dataset_name)
        mlflow.set_tag("experiment_level", "test")

        def objective(trial):
            trial_params = param_space_fn(trial)
            threshold = trial.suggest_float("threshold", 0.1, 0.5, step=0.01)

            with mlflow.start_run(nested=True):
                mlflow.set_tag("model_type", model_name)
                mlflow.set_tag("dataset", dataset_name)
                mlflow.set_tag("experiment_level", "trial")
                mlflow.set_tag("trial_number", trial.number)
                mlflow.log_param("threshold", threshold)
                mlflow.log_param("cv_folds", cv)
                mlflow.log_param("dataset_name", dataset_name)
                mlflow.log_param("n_trials", n_trials)

                for param_name, param_value in trial_params.items():
                    if param_name == "classifier__class_weight" and isinstance(
                        param_value, dict
                    ):
                        mlflow.log_param("pos_weight", param_value.get(1))
                    else:
                        clean_name = param_name.replace("classifier__", "")
                        mlflow.log_param(clean_name, param_value)

                pipeline = Pipeline(
                    [
                        ("preprocessing", preprocessor),
                        ("classifier", model_class(random_state=42, n_jobs=1)),
                    ]
                )
                pipeline.set_params(**trial_params)

                kf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
                recall_scores = []
                precision_scores = []
                accuracy_scores = []
                f1_scores = []
                balanced_accuracy_scores = []
                roc_auc_scores = []

                for fold_idx, (train_idx, val_idx) in enumerate(
                    kf.split(X_train, y_train)
                ):
                    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                    pipeline.fit(X_tr, y_tr)
                    y_proba = pipeline.predict_proba(X_val)[:, 1]
                    y_pred = (y_proba > threshold).astype(int)

                    recall = recall_score(y_val, y_pred)
                    precision = precision_score(y_val, y_pred)
                    accuracy = accuracy_score(y_val, y_pred)
                    f1 = f1_score(y_val, y_pred)
                    balanced_accuracy = balanced_accuracy_score(y_val, y_pred)
                    roc_auc = roc_auc_score(y_val, y_proba)

                    recall_scores.append(recall)
                    precision_scores.append(precision)
                    accuracy_scores.append(accuracy)
                    f1_scores.append(f1)
                    balanced_accuracy_scores.append(balanced_accuracy)
                    roc_auc_scores.append(roc_auc)

                    # # Optuna pruner: report step-wise performance
                    # trial.report(f1, step=fold_idx)

                    # if trial.should_prune():
                    #     mlflow.log_param("pruned_at_step", fold_idx)
                    #     mlflow.log_metric("partial_f1", f1)
                    #     print(
                    #         f"⏹️ Trial {trial.number} pruned at fold {fold_idx} (f1={f1:.4f})"
                    #     )
                    #     raise optuna.exceptions.TrialPruned()

                avg_recall = np.mean(recall_scores)
                avg_precision = np.mean(precision_scores)
                avg_accuracy = np.mean(accuracy_scores)
                avg_f1 = np.mean(f1_scores)
                avg_balanced_accuracy = np.mean(balanced_accuracy_scores)
                avg_roc_auc = np.mean(roc_auc_scores)

                mlflow.log_metric("recall", avg_recall)
                mlflow.log_metric("precision", avg_precision)
                mlflow.log_metric("accuracy", avg_accuracy)
                mlflow.log_metric("f1", avg_f1)
                mlflow.log_metric("balanced_accuracy", avg_balanced_accuracy)

                combined_score = 0.8 * avg_recall + 0.2 * avg_precision
                mlflow.log_metric("combined_score", combined_score)
                mlflow.log_metric("roc_auc", avg_roc_auc)

                log_mlflow_models(
                    X_train,
                    pipeline,
                    "model",
                )

                return combined_score

        # pruner=optuna.pruners.MedianPruner(n_startup_trials=5)
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=n_trials, n_jobs=1)

        best_params = study.best_params
        best_threshold = best_params.pop("threshold")
        solver_penalty = study.best_params.get("solver_penalty")
        class_weight = study.best_params.get("positive_class_weight")

        if solver_penalty:
            solver, penalty = solver_penalty.split("__")
        best_params = {
            k: v for k, v in best_params.items() if k.startswith("classifier__")
        }
        # Récupère les paramètres globaux

        mlflow.log_param("cv_folds", cv)
        mlflow.log_param("dataset_name", dataset_name)
        mlflow.log_param("n_trials", n_trials)
        mlflow.log_param("threshold", best_threshold)
        for param_name, param_value in best_params.items():
            clean_name = param_name.replace("classifier__", "")
            mlflow.log_param(clean_name, param_value)
        if model_class == LogisticRegression:
            mlflow.log_param("solver", solver)
            mlflow.log_param("penalty", penalty)
        mlflow.log_param("pos_weight", class_weight)

        pipeline = Pipeline(
            [
                ("preprocessing", preprocessor),
                ("classifier", model_class(random_state=42, n_jobs=1)),
            ]
        )
        pipeline.set_params(**best_params)
        pipeline.fit(X_train, y_train)

        y_proba = pipeline.predict_proba(X_test)[:, 1]
        y_pred = (y_proba > best_threshold).astype(int)

        metrics, cm = evaluate_model(y_test, y_pred, y_proba)
        for key, value in metrics.items():
            mlflow.log_metric(key, value)

        img_path = plot_confusion_matrix_heatmap(
            cm, labels=[0, 1], title=experiment_name
        )
        mlflow.log_artifact(img_path)
        os.remove(img_path)

        report_str = classification_report(y_test, y_pred)
        report_path_txt = "classification_report.txt"
        with open(report_path_txt, "w") as f:
            f.write(report_str)
        mlflow.log_artifact(report_path_txt)
        os.remove(report_path_txt)

        log_mlflow_models(
            X_train,
            pipeline,
            "model_final",
        )


# Param space

In [20]:
def xgboost_param_space(
    trial: optuna.trial.Trial,
    n_estimators_range: Tuple[int, int],
    max_depth_range: Tuple[int, int],
    learning_rate_range: Tuple[float, float],
    subsample_range: Tuple[float, float],
    colsample_bytree_range: Tuple[float, float],
    gamma_range: Tuple[float, float],
    reg_alpha_range: Tuple[float, float],
    reg_lambda_range: Tuple[float, float],
    scale_pos_weight_range: Tuple[float, float],
) -> Dict[str, Any]:
    """
    Suggests a hyperparameter configuration for an XGBoost classifier using Optuna.

    Args:
        trial: Optuna trial object used to sample hyperparameters.
        n_estimators_range: Range for the number of boosting rounds.
        max_depth_range: Range for maximum tree depth.
        learning_rate_range: Range for learning rate (log scale recommended).
        subsample_range: Range for row subsampling.
        colsample_bytree_range: Range for feature subsampling.
        gamma_range: Range for minimum loss reduction to split a node.
        reg_alpha_range: L1 regularization term.
        reg_lambda_range: L2 regularization term.
        scale_pos_weight_range: Range for the ratio of negative to positive samples (class imbalance correction).

    Returns:
        A dictionary of hyperparameters for a scikit-learn Pipeline using XGBClassifier.
    """
    params = {
        "classifier__n_estimators": trial.suggest_int(
            "classifier__n_estimators", *n_estimators_range
        ),
        "classifier__max_depth": trial.suggest_int(
            "classifier__max_depth", *max_depth_range
        ),
        "classifier__learning_rate": trial.suggest_float(
            "classifier__learning_rate", *learning_rate_range, log=True
        ),
        "classifier__subsample": trial.suggest_float(
            "classifier__subsample", *subsample_range
        ),
        "classifier__colsample_bytree": trial.suggest_float(
            "classifier__colsample_bytree", *colsample_bytree_range
        ),
        "classifier__gamma": trial.suggest_float(
            "classifier__gamma", *gamma_range
        ),
        "classifier__reg_alpha": trial.suggest_float(
            "classifier__reg_alpha", *reg_alpha_range
        ),
        "classifier__reg_lambda": trial.suggest_float(
            "classifier__reg_lambda", *reg_lambda_range
        ),
        "classifier__scale_pos_weight": trial.suggest_float(
            "classifier__scale_pos_weight", *scale_pos_weight_range
        ),
    }
    return params


# Run

## Logistic  Regression


In [21]:
def logistic_regression_param_space(
    trial: optuna.trial.Trial,
    class_weight_range: Tuple[float, float],
    max_iter_range: Tuple[int, int],
    tol_range: Tuple[float, float],
    C_range: Tuple[float, float],
    l1_ratio_range: Tuple[float, float],
) -> Dict[str, Any]:
    """
    Suggests a hyperparameter configuration for a LogisticRegression classifier using Optuna.

    Args:
        trial: Optuna trial object used to sample hyperparameters.
        class_weight_range: Range for the positive class weight (e.g., (1, 100)).
        max_iter_range: Range for the maximum number of iterations.
        tol_range: Range for solver tolerance (log scale recommended).
        C_range: Range for the inverse of regularization strength (log scale).
        l1_ratio_range: Range for L1 mixing ratio (only used if penalty='elasticnet').

    Returns:
        A dictionary of hyperparameters with keys prefixed by 'classifier__', ready to be passed to a scikit-learn Pipeline.
    """
    valid_combos = [
        "liblinear__l1",
        "liblinear__l2",
        "lbfgs__l2",
        "saga__l1",
        "saga__l2",
        "saga__elasticnet",
    ]
    combo = trial.suggest_categorical("solver_penalty", valid_combos)
    solver, penalty = combo.split("__")

    pos_weight = trial.suggest_float(
        "positive_class_weight", *class_weight_range
    )
    class_weight = {0: 1, 1: pos_weight}

    params = {
        "classifier__solver": solver,
        "classifier__penalty": penalty,
        "classifier__max_iter": trial.suggest_int(
            "classifier__max_iter", *max_iter_range
        ),
        "classifier__tol": trial.suggest_float(
            "classifier__tol", *tol_range, log=True
        ),
        "classifier__class_weight": class_weight,
    }

    if penalty is not None:
        params["classifier__C"] = trial.suggest_float(
            "classifier__C", *C_range, log=True
        )

    if penalty == "elasticnet":
        params["classifier__l1_ratio"] = trial.suggest_float(
            "classifier__l1_ratio", *l1_ratio_range
        )

    return params


lr_param_space = partial(
    logistic_regression_param_space,
    class_weight_range=(1, 100),
    max_iter_range=(100, 300),
    tol_range=(1e-4, 1e-2),
    C_range=(0.01, 10),
    l1_ratio_range=(0.0, 1.0),
)


In [22]:
X_train, X_test, y_train, y_test, preproc = prepare_data(
    data,
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=False,
)

run_experiment(
    model_name="LogisticRegression_v2",
    model_class=LogisticRegression,
    param_space_fn=lr_param_space,
    dataset_name="application_train_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 41 column(s) with missing ratio ≥ 0.5:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIVINGAPARTMENTS_MEDI
 - NONLIVINGAREA_MEDI
 - FONDKAPREMONT_MODE
 - HOUSETYPE_MODE
 - WALLSMATERIAL_MODE
Found 67 numerical features
Found 4 binary features
Found 9 categorical features


[I 2025-05-08 17:20:46,419] A new study created in memory with name: no-name-feec3b71-cf4e-4226-860f-01c7c01447c0
[I 2025-05-08 17:21:03,958] Trial 0 finished with value: 0.8160500084049103 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 75.27809140575872, 'classifier__max_iter': 219, 'classifier__tol': 0.009424595824117364, 'classifier__C': 0.02367591651201649, 'threshold': 0.38}. Best is trial 0 with value: 0.8160500084049103.


🏃 View run vaunted-wolf-202 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/68e330edaf154ff899074f6ef0e5caf5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:24:12,089] Trial 1 finished with value: 0.7847562346138246 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 68.59983341023863, 'classifier__max_iter': 134, 'classifier__tol': 0.00705112661923377, 'classifier__C': 0.16863825214590725, 'threshold': 0.23}. Best is trial 0 with value: 0.8160500084049103.


🏃 View run dazzling-koi-576 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/c08f268d1fce430798c78646d25d1c8a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:27:22,126] Trial 2 finished with value: 0.6325934007018018 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 53.85226301332476, 'classifier__max_iter': 229, 'classifier__tol': 0.00026380870494884364, 'classifier__C': 0.04463000374297966, 'threshold': 0.19}. Best is trial 0 with value: 0.8160500084049103.


🏃 View run hilarious-grub-937 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/8e9542459aef4e40a969702dfd0e7cd8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:27:41,009] Trial 3 finished with value: 0.5446776806464679 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 1.8295040009262657, 'classifier__max_iter': 100, 'classifier__tol': 0.00017413097931206538, 'classifier__C': 0.046411279469258396, 'threshold': 0.14}. Best is trial 0 with value: 0.8160500084049103.


🏃 View run adorable-dove-239 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/381e62bb9bfe4d55927dce13e91334f3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:27:59,830] Trial 4 finished with value: 0.8161085287889263 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 18.875860163364145, 'classifier__max_iter': 280, 'classifier__tol': 0.001262164458140034, 'classifier__C': 0.044384142471275076, 'threshold': 0.1}. Best is trial 4 with value: 0.8161085287889263.


🏃 View run gifted-snipe-963 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/f6392c09efc94d08840ef108be9e4803
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:28:24,399] Trial 5 finished with value: 0.36649114103367586 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 3.7724351430333343, 'classifier__max_iter': 179, 'classifier__tol': 0.0034831754453284606, 'classifier__C': 0.01873803164975516, 'threshold': 0.33999999999999997}. Best is trial 4 with value: 0.8161085287889263.


🏃 View run fun-carp-30 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/4b239526471c428797032b5e189aed8f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:28:50,048] Trial 6 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 60.31843919303913, 'classifier__max_iter': 222, 'classifier__tol': 0.0024381060808598537, 'classifier__C': 0.02087077363285046, 'threshold': 0.11}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run unequaled-colt-311 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/9bc9414e9dac407d8d6a006ec1ec8258
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:32:39,072] Trial 7 finished with value: 0.7618996141216764 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 73.63121520297534, 'classifier__max_iter': 273, 'classifier__tol': 0.00027158919495352834, 'classifier__C': 0.043581946734786985, 'threshold': 0.22}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run sincere-grub-33 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/9c220c7eb51a4626a60d9675a3e3d038
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:32:59,123] Trial 8 finished with value: 0.8161150973878043 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 93.3864559105265, 'classifier__max_iter': 272, 'classifier__tol': 0.0003278096609715369, 'classifier__C': 0.014414924849348922, 'threshold': 0.38}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run calm-horse-323 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/e25df1943b9c46fcbfcc417cb6a94754
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:33:19,011] Trial 9 finished with value: 0.8152067824048318 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 34.66899248936894, 'classifier__max_iter': 119, 'classifier__tol': 0.006563736553417774, 'classifier__C': 3.131812726002029, 'threshold': 0.29000000000000004}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run welcoming-hawk-822 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/c26e4ae4bd1b49df93aeb9c445db3daa
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:34:17,149] Trial 10 finished with value: 0.8065406231671688 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 43.36173257975757, 'classifier__max_iter': 171, 'classifier__tol': 0.001299841338869746, 'classifier__C': 1.0997018329030088, 'threshold': 0.48}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run luxuriant-sheep-97 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1d6788f1736b41d4b52f732387402ee2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:34:58,420] Trial 11 finished with value: 0.8161210796351662 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 98.68390783273335, 'classifier__max_iter': 247, 'classifier__tol': 0.000540831411815111, 'classifier__C': 0.01052865338329766, 'threshold': 0.43000000000000005}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run classy-fowl-712 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/fed1cce89f6a44a3a8881e6a96515bca
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:35:19,446] Trial 12 finished with value: 0.8154426904364 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 91.82564222052022, 'classifier__max_iter': 230, 'classifier__tol': 0.002287756904363146, 'classifier__C': 0.18692587181786002, 'threshold': 0.48}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run placid-cub-681 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6579235c3eba492f8dc728390991099b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:40:45,039] Trial 13 finished with value: 0.6997902586690317 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 98.91000537949003, 'classifier__max_iter': 252, 'classifier__tol': 0.0006637135043063524, 'classifier__C': 0.8137920272384012, 'classifier__l1_ratio': 0.1829748745148425, 'threshold': 0.4}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run rebellious-ram-939 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/e14494971b1747b390d86e8c22d83b5c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:41:27,477] Trial 14 finished with value: 0.8161147688349291 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 58.076135508452296, 'classifier__max_iter': 189, 'classifier__tol': 0.0006080129495526598, 'classifier__C': 0.0116082014292787, 'threshold': 0.29000000000000004}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run nosy-turtle-252 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/e292187c5d9747479f5ed7abfa7ef8c9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:41:51,651] Trial 15 finished with value: 0.8157922064242211 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 79.9250335011487, 'classifier__max_iter': 246, 'classifier__tol': 0.002432009381183368, 'classifier__C': 0.11261867820260266, 'threshold': 0.43000000000000005}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run righteous-roo-645 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/c410070c4a3b4fec9ad55bfd194f63ad
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:43:09,753] Trial 16 finished with value: 0.8161124693712944 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 31.389538071587616, 'classifier__max_iter': 203, 'classifier__tol': 0.0006780908059445819, 'classifier__C': 0.38142522374380805, 'threshold': 0.16}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run resilient-mink-410 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/df40e3a765f74d92862c59da8ddddc01
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:48:33,120] Trial 17 finished with value: 0.6703030452795475 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 61.67197127589943, 'classifier__max_iter': 248, 'classifier__tol': 0.002047961891867144, 'classifier__C': 6.294770228583062, 'threshold': 0.25}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run indecisive-goose-194 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/c38f57088e74490d9897e3d4ba82de6c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:55:10,753] Trial 18 finished with value: 0.7748732933840198 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 83.58161996471621, 'classifier__max_iter': 293, 'classifier__tol': 0.00012187963310784157, 'classifier__C': 0.08351680821364096, 'classifier__l1_ratio': 0.9391307413851608, 'threshold': 0.33}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run unequaled-kite-593 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/73a9f8b362b84852bd1aadc0aaece7a0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:55:31,333] Trial 19 finished with value: 0.8136019471371361 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 48.84247318885505, 'classifier__max_iter': 153, 'classifier__tol': 0.004034369826881938, 'classifier__C': 0.010372882314584275, 'threshold': 0.44000000000000006}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run powerful-newt-316 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/252af8f3fd6a45719491bdd00f8cdd75
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:56:34,120] Trial 20 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 88.65828597590667, 'classifier__max_iter': 209, 'classifier__tol': 0.00046007119876903844, 'classifier__C': 0.023340520010300722, 'threshold': 0.1}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run peaceful-dog-466 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/b56a79ab109d4b17b6193512cb690d0d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:57:31,497] Trial 21 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 88.24367104069887, 'classifier__max_iter': 207, 'classifier__tol': 0.00045015247124735033, 'classifier__C': 0.026095035725941275, 'threshold': 0.1}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run polite-skink-196 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/93a10c100dd84fbe99f4fd37484152fa
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:58:40,851] Trial 22 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 85.46485728862096, 'classifier__max_iter': 210, 'classifier__tol': 0.00036195470242528357, 'classifier__C': 0.07651336951625051, 'threshold': 0.1}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run classy-calf-714 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/34ffec16e5e340e5bae5d01bfe4b017d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:59:10,574] Trial 23 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 64.72200839489528, 'classifier__max_iter': 165, 'classifier__tol': 0.0012767320737596612, 'classifier__C': 0.025657532958980555, 'threshold': 0.15000000000000002}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run thoughtful-gull-576 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6c80a73d426949d394716f36e0b6abca
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 17:59:48,480] Trial 24 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 88.1557466578755, 'classifier__max_iter': 193, 'classifier__tol': 0.0009688129364693687, 'classifier__C': 0.026504973875513545, 'threshold': 0.13}. Best is trial 6 with value: 0.8161458163978155.


🏃 View run bustling-newt-36 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/8b75ca8cfedb40e0be1403a88b707028
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:02:10,219] Trial 25 finished with value: 0.8161468009721939 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 72.30745049568824, 'classifier__max_iter': 216, 'classifier__tol': 0.00043353307778173445, 'classifier__C': 0.34974079344847664, 'threshold': 0.19}. Best is trial 25 with value: 0.8161468009721939.


🏃 View run gifted-ram-221 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/78892307746d43df8de1098a56729a30
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:02:36,637] Trial 26 finished with value: 0.8161460132950807 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 70.23752162449007, 'classifier__max_iter': 231, 'classifier__tol': 0.00020366019044843118, 'classifier__C': 0.44114648507851545, 'threshold': 0.19}. Best is trial 25 with value: 0.8161468009721939.


🏃 View run learned-lynx-673 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/910c1d18d12f491f9d232b9e07e28270
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:03:00,552] Trial 27 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 69.60171402669354, 'classifier__max_iter': 230, 'classifier__tol': 0.00017725798676093674, 'classifier__C': 0.5371331406384289, 'threshold': 0.18}. Best is trial 25 with value: 0.8161468009721939.


🏃 View run honorable-fox-397 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1cb14983caa7492f95bed1ed15746bba
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:03:28,978] Trial 28 finished with value: 0.8161492956615869 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 48.42436926035391, 'classifier__max_iter': 259, 'classifier__tol': 0.00011706644533153366, 'classifier__C': 1.2278071166987328, 'threshold': 0.2}. Best is trial 28 with value: 0.8161492956615869.


🏃 View run gentle-eel-345 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/421311dc069d4aabbe540315ecb5c120
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:03:53,326] Trial 29 finished with value: 0.8160870886545237 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 42.64209245641346, 'classifier__max_iter': 258, 'classifier__tol': 0.00013028247465705976, 'classifier__C': 1.6187277598893888, 'threshold': 0.25}. Best is trial 28 with value: 0.8161492956615869.


🏃 View run mysterious-wren-735 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/4c1e57a6313346268402c62241c0e015
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470
🏃 View run LogisticRegression_v2__application_train_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/d363fffcb68d43c7b19d7b9cf68d078f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


In [23]:
X_train, X_test, y_train, y_test, preproc = prepare_data(
    data[
        ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH", "TARGET"]
    ],
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=True,
)


run_experiment(
    model_name="LogisticRegression_v2",
    model_class=LogisticRegression,
    param_space_fn=lr_param_space,
    dataset_name="application_train_poly_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 1 column(s) with missing ratio ≥ 0.5:
 - EXT_SOURCE_1
Found 3 numerical features
Found 0 binary features
Found 0 categorical features


[I 2025-05-08 18:04:04,580] A new study created in memory with name: no-name-a9501c76-c901-4326-9705-4478a2b2c6aa
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 18:04:59,210] Trial 0 finished with value: 0.6356962964966312 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 35.15493939800958, 'classifier__max_iter': 199, 'classifier__tol': 0.00027616323427928483, 'classifier__C': 0.09789395814667179, 'classifier__l1_ratio': 0.964429288270654, 'threshold': 0.48}. Best is trial 0 with value: 0.6356962964966312.


🏃 View run classy-goose-795 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/72e751d499ae4d9d894d8e4ebecbf207
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:03,508] Trial 1 finished with value: 0.8070177191074539 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 38.41049077824202, 'classifier__max_iter': 280, 'classifier__tol': 0.00011962411244648284, 'classifier__C': 0.10706978679235937, 'threshold': 0.49}. Best is trial 1 with value: 0.8070177191074539.


🏃 View run melodic-rat-747 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6c894e39ba414429b29b4f130f029326
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:10,276] Trial 2 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 89.02430342387945, 'classifier__max_iter': 118, 'classifier__tol': 0.0021495014163028098, 'classifier__C': 0.29774511259148406, 'threshold': 0.21000000000000002}. Best is trial 2 with value: 0.8161458163978155.


🏃 View run agreeable-fox-520 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/281363781abc4c9a8e1275b39df8ba50
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:18,097] Trial 3 finished with value: 0.8159226395151495 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 32.85583912001472, 'classifier__max_iter': 254, 'classifier__tol': 0.005235526421299742, 'classifier__C': 0.010340345755691068, 'threshold': 0.28}. Best is trial 2 with value: 0.8161458163978155.


🏃 View run merciful-loon-531 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/05c83d587b51443ea581da21acd8bc97
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:22,786] Trial 4 finished with value: 0.7646346908663554 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 3.8423927335357138, 'classifier__max_iter': 249, 'classifier__tol': 0.005060261248221017, 'classifier__C': 0.16039291699464867, 'threshold': 0.14}. Best is trial 2 with value: 0.8161458163978155.


🏃 View run orderly-shrew-755 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6fa74117e9c047ca95e911dd70175f36
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:26,563] Trial 5 finished with value: 0.8104788800887806 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 19.88813996533406, 'classifier__max_iter': 225, 'classifier__tol': 0.0011761646727873282, 'classifier__C': 0.07969704379408894, 'threshold': 0.31}. Best is trial 2 with value: 0.8161458163978155.


🏃 View run mercurial-frog-800 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6eae89838fe84482b0f190537e3fe338
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:32,374] Trial 6 finished with value: 0.8161051152237395 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 33.00219194844441, 'classifier__max_iter': 152, 'classifier__tol': 0.00018366697485021002, 'classifier__C': 7.5924165573572235, 'threshold': 0.23}. Best is trial 2 with value: 0.8161458163978155.


🏃 View run incongruous-bass-330 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/3c34de0e7c7b4134b1978cb2e5ecbab7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:39,048] Trial 7 finished with value: 0.8161472604542902 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 74.63693224745766, 'classifier__max_iter': 277, 'classifier__tol': 0.006432102604706401, 'classifier__C': 0.027744899105623413, 'threshold': 0.42000000000000004}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run resilient-dolphin-25 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/8458784cac8b480bbb49fd9425dd368a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:05:43,210] Trial 8 finished with value: 0.8161471948200898 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 19.49006502144734, 'classifier__max_iter': 170, 'classifier__tol': 0.00010397621701891894, 'classifier__C': 0.07255474232058078, 'threshold': 0.16}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run capricious-koi-226 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/4a0c9e03b6624fed9c1d5db136349195
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:06:53,184] Trial 9 finished with value: 0.8002179393544074 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 40.86607549219745, 'classifier__max_iter': 256, 'classifier__tol': 0.00013683721766704498, 'classifier__C': 0.2531853192334051, 'classifier__l1_ratio': 0.65525314871031, 'threshold': 0.2}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run nervous-sloth-654 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/ff59fe77fb2a48dbb66010ef4aebf4ea
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:08:04,947] Trial 10 finished with value: 0.8040859763099663 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 74.77990569633742, 'classifier__max_iter': 299, 'classifier__tol': 0.00848337193759063, 'classifier__C': 0.012046736134188446, 'threshold': 0.39}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run amazing-penguin-100 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1aa2589b1f4241bab9fcbecff6d0a7ce
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:08:08,476] Trial 11 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 62.94892906271454, 'classifier__max_iter': 188, 'classifier__tol': 0.00046966021353083624, 'classifier__C': 0.025364824244295203, 'threshold': 0.11}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run capable-eel-583 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/23a55f55cd7c45d6be1b71de33d2ed47
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:08:33,194] Trial 12 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 99.29939481518292, 'classifier__max_iter': 161, 'classifier__tol': 0.0006420403879883595, 'classifier__C': 1.3875852538928752, 'threshold': 0.39}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run upset-roo-468 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/2f82be4f141d49cd9a60e0b83a6753b3
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:01,056] Trial 13 finished with value: 0.7604825358350231 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 58.55928232475615, 'classifier__max_iter': 110, 'classifier__tol': 0.0020928788229440964, 'classifier__C': 0.03346048165057614, 'threshold': 0.39}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run awesome-ape-256 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/7c7a05fefed34fb487435d03b1690e17
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:34,507] Trial 14 finished with value: 0.5846024365733253 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 75.74772935650897, 'classifier__max_iter': 169, 'classifier__tol': 0.002034001591256998, 'classifier__C': 0.9055627368384964, 'threshold': 0.33999999999999997}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run grandiose-bass-280 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/631c79916cf54d81ae955ab1cadcba29
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:38,447] Trial 15 finished with value: 0.42896149001802014 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 7.131970243039762, 'classifier__max_iter': 227, 'classifier__tol': 0.00034357090994817943, 'classifier__C': 0.03782797271758524, 'threshold': 0.43000000000000005}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run worried-goat-195 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1b47b014f2d7401088448afef188a145
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:43,000] Trial 16 finished with value: 0.8161462758372184 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 19.032575079380297, 'classifier__max_iter': 138, 'classifier__tol': 0.0009444708547285972, 'classifier__C': 0.6234608801791225, 'threshold': 0.15000000000000002}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run stately-hen-304 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/0d3c9094314a44d5b4e849317a682cf2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:47,607] Trial 17 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 50.3350002274488, 'classifier__max_iter': 219, 'classifier__tol': 0.009964234206109866, 'classifier__C': 0.050248102961820154, 'threshold': 0.29000000000000004}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run painted-hound-693 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/64ad006e9ddf4a8b80c7e0a9a9b22f15
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:09:50,866] Trial 18 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 76.51682974506639, 'classifier__max_iter': 182, 'classifier__tol': 0.0037849808087193153, 'classifier__C': 0.025047653693754987, 'threshold': 0.33999999999999997}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run marvelous-kite-168 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/700379fe84364dd18898a78bda8ab819
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:10:49,046] Trial 19 finished with value: 0.8159610386909272 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 63.02813362420033, 'classifier__max_iter': 129, 'classifier__tol': 0.0011431007339598107, 'classifier__C': 2.606595518954715, 'threshold': 0.44000000000000006}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run capable-flea-284 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/28c30c59ae634bb9ba54b474fbaa5328
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:11:50,805] Trial 20 finished with value: 0.4469457854202922 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 19.684809385820603, 'classifier__max_iter': 281, 'classifier__tol': 0.0002652493718633339, 'classifier__C': 0.015513650422575824, 'threshold': 0.25}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run calm-hen-998 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/139efe91862d453c9688507955576ad8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:11:55,163] Trial 21 finished with value: 0.8161462758372184 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 19.115998125670863, 'classifier__max_iter': 147, 'classifier__tol': 0.000687787787359321, 'classifier__C': 0.35893502807053956, 'threshold': 0.15000000000000002}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run languid-boar-326 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6b799cc6c9fc4421a1a20b0472c4a66d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:11:59,659] Trial 22 finished with value: 0.8157301963988427 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 14.698395929562736, 'classifier__max_iter': 133, 'classifier__tol': 0.0028794521258135196, 'classifier__C': 0.7415771512811742, 'threshold': 0.16}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run worried-grub-528 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/a2b97103d91844cab027704a29bb60c8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:12:04,066] Trial 23 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 47.559278408507154, 'classifier__max_iter': 144, 'classifier__tol': 0.0008217963088574632, 'classifier__C': 0.497066475731099, 'threshold': 0.11}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run glamorous-fawn-83 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/58ac8a2e0dae472f847cd0f67a8ab225
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:12:08,095] Trial 24 finished with value: 0.8161467353033037 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 24.069702531917063, 'classifier__max_iter': 102, 'classifier__tol': 0.001486423588950695, 'classifier__C': 0.1956645152563514, 'threshold': 0.19}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run skittish-gnu-122 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/f60b0f5036464027b3b324716317be6d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 18:12:42,532] Trial 25 finished with value: 0.5237399710874934 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 26.762236992230285, 'classifier__max_iter': 173, 'classifier__tol': 0.006471012892797077, 'classifier__C': 0.06100846118109804, 'threshold': 0.19}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run bittersweet-slug-709 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/e9e846da2250470ba2646fdc95df9675
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:12:46,308] Trial 26 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 86.83458988069798, 'classifier__max_iter': 207, 'classifier__tol': 0.0016725636808954058, 'classifier__C': 0.17445876175229644, 'threshold': 0.25}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run resilient-mule-628 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/8e40510444904099a307cfd0d666d6ce
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:12:54,507] Trial 27 finished with value: 0.8094513519672096 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 9.326497012822784, 'classifier__max_iter': 100, 'classifier__tol': 0.003359296555652349, 'classifier__C': 0.12966217156811327, 'classifier__l1_ratio': 0.014035292424570689, 'threshold': 0.18}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run learned-deer-327 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/4ec5f160181847a8b37626b146eb2216
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:13:29,713] Trial 28 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 45.01902412729113, 'classifier__max_iter': 234, 'classifier__tol': 0.00047321398011669413, 'classifier__C': 0.018936845494739472, 'threshold': 0.1}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run thundering-bear-830 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/e6e15d0732984e94b0c1e09cbbdd3f78
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:14:20,392] Trial 29 finished with value: 0.640622306784919 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 26.063744560637495, 'classifier__max_iter': 207, 'classifier__tol': 0.0002060960682548401, 'classifier__C': 0.07501685333694649, 'classifier__l1_ratio': 0.09163143188100564, 'threshold': 0.45000000000000007}. Best is trial 7 with value: 0.8161472604542902.


🏃 View run learned-fowl-338 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/76e319d8022a493ba1f4d0811a4abd77
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


🏃 View run LogisticRegression_v2__application_train_poly_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/bef7f1abb6fd43c0bed2be7aca0ebe2e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


In [24]:
data_feat_engineer = data.copy()

data_feat_engineer["CREDIT_INCOME_PERCENT"] = (
    data_feat_engineer["AMT_CREDIT"] / data_feat_engineer["AMT_INCOME_TOTAL"]
)
data_feat_engineer["ANNUITY_INCOME_PERCENT"] = (
    data_feat_engineer["AMT_ANNUITY"] / data_feat_engineer["AMT_INCOME_TOTAL"]
)
data_feat_engineer["CREDIT_TERM"] = (
    data_feat_engineer["AMT_ANNUITY"] / data_feat_engineer["AMT_CREDIT"]
)
data_feat_engineer["DAYS_EMPLOYED_PERCENT"] = (
    data_feat_engineer["DAYS_EMPLOYED"] / data_feat_engineer["DAYS_BIRTH"]
)

X_train, X_test, y_train, y_test, preproc = prepare_data(
    data_feat_engineer,
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=False,
)


run_experiment(
    model_name="LogisticRegression_v2",
    model_class=LogisticRegression,
    param_space_fn=lr_param_space,
    dataset_name="application_train_feat_engineer_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 41 column(s) with missing ratio ≥ 0.5:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIVINGAPARTMENTS_MEDI
 - NONLIVINGAREA_MEDI
 - FONDKAPREMONT_MODE
 - HOUSETYPE_MODE
 - WALLSMATERIAL_MODE
Found 71 numerical features
Found 4 binary features
Found 9 categorical features


[I 2025-05-08 18:14:23,779] A new study created in memory with name: no-name-9dad4dcb-d9fc-4326-8808-26b26bcdb34f
[I 2025-05-08 18:14:44,443] Trial 0 finished with value: 0.8158604068660228 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 52.61278804145748, 'classifier__max_iter': 267, 'classifier__tol': 0.003948745923383017, 'classifier__C': 0.05924972164895547, 'threshold': 0.32}. Best is trial 0 with value: 0.8158604068660228.


🏃 View run colorful-dolphin-874 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/2b768a26e624483c8dca1566fd2db710
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:19:43,247] Trial 1 finished with value: 0.8161467353113083 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 79.71316824680582, 'classifier__max_iter': 232, 'classifier__tol': 0.0001173442964559351, 'classifier__C': 0.17726641933196757, 'threshold': 0.19}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run incongruous-cat-483 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/5a6ee9f7c30a4262bf81a581b06c9560
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:24:45,389] Trial 2 finished with value: 0.7814594645117338 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 85.12661980377865, 'classifier__max_iter': 223, 'classifier__tol': 0.008229742688123884, 'classifier__C': 0.023027285642691474, 'threshold': 0.21000000000000002}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run nosy-trout-118 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/8639e47207304aef878b7fc22844fcdb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:25:02,776] Trial 3 finished with value: 0.8161463414927653 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 82.36019004774091, 'classifier__max_iter': 171, 'classifier__tol': 0.009949625161707755, 'classifier__C': 0.25927768205920493, 'threshold': 0.14}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run redolent-cub-851 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/0f4111168e5b4312b9af14b532f5c115
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:25:22,837] Trial 4 finished with value: 0.8161458163978155 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 83.10507423270737, 'classifier__max_iter': 278, 'classifier__tol': 0.006003323003139463, 'classifier__C': 0.04205275931054834, 'threshold': 0.28}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run trusting-moth-437 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1ede4a734b4f46b09ea62e55c1c88f41
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:25:44,201] Trial 5 finished with value: 0.8094238503265585 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 38.51049101218031, 'classifier__max_iter': 231, 'classifier__tol': 0.00034857333143365295, 'classifier__C': 1.0044297896480527, 'threshold': 0.43000000000000005}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run overjoyed-elk-120 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/03f3576c46c847638bbdfc7ab5cdab87
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:26:06,511] Trial 6 finished with value: 0.8109362594675693 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 30.03805180237654, 'classifier__max_iter': 135, 'classifier__tol': 0.0003263715316762506, 'classifier__C': 0.04292346664090696, 'threshold': 0.35}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run sneaky-vole-205 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/b846d14e3d114a0d95d5f67f42659fb8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:26:25,733] Trial 7 finished with value: 0.8158709688445753 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 94.4331874586464, 'classifier__max_iter': 278, 'classifier__tol': 0.00189625325818474, 'classifier__C': 0.05387737339874811, 'threshold': 0.48}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run skittish-smelt-933 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/ae4bf3028dbc4fd584d5578657863724
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:27:05,077] Trial 8 finished with value: 0.8146925880327164 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 17.192445415649743, 'classifier__max_iter': 219, 'classifier__tol': 0.005650732208961106, 'classifier__C': 4.111691897557642, 'threshold': 0.17}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run casual-newt-941 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6863ff7d350b482688f5ac989edf3f21
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:27:24,542] Trial 9 finished with value: 0.8150573151666858 and parameters: {'solver_penalty': 'lbfgs__l2', 'positive_class_weight': 62.68446475970236, 'classifier__max_iter': 154, 'classifier__tol': 0.0007220009805031511, 'classifier__C': 0.03625572331587008, 'threshold': 0.43000000000000005}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run abundant-cat-108 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/52abb17959784582840a0db3c408cd8c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:30:04,123] Trial 10 finished with value: 0.7985666323575635 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 66.0918867758399, 'classifier__max_iter': 111, 'classifier__tol': 0.00012547666137273684, 'classifier__C': 0.2973861310043387, 'classifier__l1_ratio': 0.3964319375067312, 'threshold': 0.1}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run fortunate-quail-565 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1c9f341df74445e5a29cae0be0f3c925
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:30:39,135] Trial 11 finished with value: 0.816146275858565 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 78.08071751680879, 'classifier__max_iter': 181, 'classifier__tol': 0.0016651623637513374, 'classifier__C': 0.31204235422243687, 'threshold': 0.1}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run classy-perch-312 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/d559b3353e9f4186b6b1b84d18cfe9fd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:33:18,284] Trial 12 finished with value: 0.7443464097793124 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 95.77172931903682, 'classifier__max_iter': 183, 'classifier__tol': 0.00011409249017151078, 'classifier__C': 1.0974816243373562, 'threshold': 0.19}. Best is trial 1 with value: 0.8161467353113083.


🏃 View run serious-hawk-835 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/dd7d1e1b517145909664fb63039d521b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:34:31,169] Trial 13 finished with value: 0.8161481795385841 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 69.4116609952529, 'classifier__max_iter': 246, 'classifier__tol': 0.0005282041315206512, 'classifier__C': 0.1529241320285082, 'threshold': 0.24000000000000002}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run learned-swan-54 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/ad7f011501c647dc8c509f2214d75db6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:35:19,805] Trial 14 finished with value: 0.8161479825719297 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 64.27775254340226, 'classifier__max_iter': 254, 'classifier__tol': 0.00034730430336525815, 'classifier__C': 0.01074861815864113, 'threshold': 0.25}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run monumental-gull-334 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/16416eac02f2429b9d2b77a26a9ad84d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:35:40,502] Trial 15 finished with value: 0.8161090541240954 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 60.06586412333933, 'classifier__max_iter': 250, 'classifier__tol': 0.00043753204854317645, 'classifier__C': 0.013761652843346565, 'threshold': 0.25}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run dazzling-shrew-65 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/89bbbabe11fa4f1da11bdd26d37e0978
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:39:51,297] Trial 16 finished with value: 0.6423257446794138 and parameters: {'solver_penalty': 'saga__l2', 'positive_class_weight': 39.89307620738532, 'classifier__max_iter': 299, 'classifier__tol': 0.0007466513689959463, 'classifier__C': 0.010940739893628003, 'threshold': 0.25}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run hilarious-sow-514 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/ca88481cd501462ab240c1308ed39fdb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:40:41,919] Trial 17 finished with value: 0.3272595689389925 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 3.7233427740537195, 'classifier__max_iter': 252, 'classifier__tol': 0.0002041019372459659, 'classifier__C': 0.10759702862109555, 'threshold': 0.36}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run shivering-crab-178 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/41d9d00ed1e7449c834e09f03cd92840
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:45:17,390] Trial 18 finished with value: 0.7297489917276769 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 67.23761340617494, 'classifier__max_iter': 205, 'classifier__tol': 0.0013870866259377455, 'classifier__C': 1.4583726497491858, 'classifier__l1_ratio': 0.9459902528734156, 'threshold': 0.25}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run wistful-shark-885 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/f3cd426269d548788a5584e3c487d692
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:47:44,700] Trial 19 finished with value: 0.8157410732177075 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 48.75876981833293, 'classifier__max_iter': 297, 'classifier__tol': 0.0005263191918502491, 'classifier__C': 4.161205052116344, 'threshold': 0.30000000000000004}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run exultant-ox-815 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/b3e022cb72cc44e0bc0cf84cdb64c805
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:49:25,160] Trial 20 finished with value: 0.8158615279879107 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 72.51892174882926, 'classifier__max_iter': 247, 'classifier__tol': 0.0002221017517191498, 'classifier__C': 0.09555275069712647, 'threshold': 0.39}. Best is trial 13 with value: 0.8161481795385841.


🏃 View run big-slug-983 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/2f7dfb0ce98f47b5b8bf3e27cbaa504d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 18:53:24,826] Trial 21 finished with value: 0.8161506746363849 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 52.91223765930975, 'classifier__max_iter': 238, 'classifier__tol': 0.00020612291615086805, 'classifier__C': 0.15270941836911636, 'threshold': 0.22}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run powerful-moth-890 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/7797b266ec6a4461a3b86c0492334f66
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:00:22,742] Trial 22 finished with value: 0.8160718993936715 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 51.711463422991805, 'classifier__max_iter': 203, 'classifier__tol': 0.0002521209327103034, 'classifier__C': 0.5583173787563078, 'threshold': 0.23}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run thoughtful-calf-254 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/13daab7383f74fa480d41398055083a7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:03:28,685] Trial 23 finished with value: 0.8161462758345503 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 56.85678136817351, 'classifier__max_iter': 266, 'classifier__tol': 0.00017469636974426083, 'classifier__C': 0.10418632544065426, 'threshold': 0.15000000000000002}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run carefree-bass-295 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/aec61bcf15c54d048636ae3556c7c320
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:04:29,302] Trial 24 finished with value: 0.8157467497657891 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 40.2644689688278, 'classifier__max_iter': 245, 'classifier__tol': 0.000615370787053484, 'classifier__C': 0.550331315027004, 'threshold': 0.27}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run redolent-panda-236 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/7f2a9db4bece452eb4226653746aafbb
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:05:00,823] Trial 25 finished with value: 0.8161120095293525 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 71.71097452812114, 'classifier__max_iter': 210, 'classifier__tol': 0.0009804643567052468, 'classifier__C': 0.020390155610556224, 'threshold': 0.31}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run overjoyed-grouse-679 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/7b5f9663db314d0c9b1b483be189269a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:10:58,868] Trial 26 finished with value: 0.6701649451076173 and parameters: {'solver_penalty': 'saga__elasticnet', 'positive_class_weight': 47.23877680951412, 'classifier__max_iter': 270, 'classifier__tol': 0.00037684807915649136, 'classifier__C': 2.2966023529267923, 'classifier__l1_ratio': 0.08358349320114522, 'threshold': 0.21000000000000002}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run salty-eel-856 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/286fe8b3480d431585357079f272ccb0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:11:27,340] Trial 27 finished with value: 0.8128278335848453 and parameters: {'solver_penalty': 'liblinear__l2', 'positive_class_weight': 24.57535819360223, 'classifier__max_iter': 239, 'classifier__tol': 0.00028667627667853753, 'classifier__C': 0.16014433177302023, 'threshold': 0.28}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run unruly-conch-299 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/1d7324672d254a479db75e30d541aee7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:17:22,620] Trial 28 finished with value: 0.7437367044815095 and parameters: {'solver_penalty': 'saga__l1', 'positive_class_weight': 70.34216489666085, 'classifier__max_iter': 260, 'classifier__tol': 0.0001780410989660625, 'classifier__C': 0.4885308285828133, 'threshold': 0.22}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run capricious-worm-399 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/6603d757d10748948f76e5eba883f81a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


[I 2025-05-08 19:17:48,622] Trial 29 finished with value: 0.8157180082709857 and parameters: {'solver_penalty': 'liblinear__l1', 'positive_class_weight': 56.42508915326846, 'classifier__max_iter': 287, 'classifier__tol': 0.002736138039970028, 'classifier__C': 7.939823021412841, 'threshold': 0.35}. Best is trial 21 with value: 0.8161506746363849.


🏃 View run dazzling-goat-372 at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/4ea5f0f8dec44c82911d2d365ad69288
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470
🏃 View run LogisticRegression_v2__application_train_feat_engineer_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/579090426674828470/runs/c30992d21a664cfdbb235289e4df351b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/579090426674828470


## Random Forest

In [25]:
def random_forest_param_space(
    trial: optuna.trial.Trial,
    class_weight_range: Tuple[float, float],
    n_estimators_range: Tuple[int, int],
    max_depth_range: Tuple[int, int],
    min_samples_split_range: Tuple[int, int],
    min_samples_leaf_range: Tuple[int, int],
    max_features_range: Tuple,
    bootstrap_range: Tuple,
) -> Dict[str, Any]:
    """
    Suggests a hyperparameter configuration for a RandomForestClassifier using Optuna.

    Args:
        trial: Optuna trial object used to sample hyperparameters.
        class_weight_range: Range for the positive class weight (e.g., (1, 100)).
        n_estimators_range: Range for the number of trees.
        max_depth_range: Range for tree depth.
        min_samples_split_range: Range for the minimum number of samples to split an internal node.
        min_samples_leaf_range: Range for the minimum number of samples at a leaf node.

    Returns:
        A dictionary of hyperparameters for a scikit-learn Pipeline.
    """
    pos_weight = trial.suggest_float(
        "positive_class_weight", *class_weight_range
    )
    class_weight = {0: 1, 1: pos_weight}

    params = {
        "classifier__n_estimators": trial.suggest_int(
            "classifier__n_estimators", *n_estimators_range
        ),
        "classifier__max_depth": trial.suggest_int(
            "classifier__max_depth", *max_depth_range
        ),
        "classifier__min_samples_split": trial.suggest_int(
            "classifier__min_samples_split", *min_samples_split_range
        ),
        "classifier__min_samples_leaf": trial.suggest_int(
            "classifier__min_samples_leaf", *min_samples_leaf_range
        ),
        "classifier__max_features": trial.suggest_categorical(
            "classifier__max_features", max_features_range
        ),
        "classifier__bootstrap": trial.suggest_categorical(
            "classifier__bootstrap", bootstrap_range
        ),
        "classifier__class_weight": class_weight,
    }
    return params


rf_param_space = partial(
    random_forest_param_space,
    class_weight_range=(1, 100),
    n_estimators_range=(20, 100),
    max_depth_range=(3, 8),
    min_samples_split_range=(2, 10),
    min_samples_leaf_range=(1, 10),
    max_features_range=["sqrt"],
    bootstrap_range=[True],
)


In [26]:
X_train, X_test, y_train, y_test, preproc = prepare_data(
    data,
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=False,
)

run_experiment(
    model_name="RandomForestClassifier_v2",
    model_class=RandomForestClassifier,
    param_space_fn=rf_param_space,
    dataset_name="application_train_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 41 column(s) with missing ratio ≥ 0.5:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIVINGAPARTMENTS_MEDI
 - NONLIVINGAREA_MEDI
 - FONDKAPREMONT_MODE
 - HOUSETYPE_MODE
 - WALLSMATERIAL_MODE
Found 67 numerical features
Found 4 binary features
Found 9 categorical features


2025/05/08 19:17:58 INFO mlflow.tracking.fluent: Experiment with name 'RandomForestClassifier_v2' does not exist. Creating a new experiment.
[I 2025-05-08 19:17:58,544] A new study created in memory with name: no-name-9df3b84a-c81b-4e16-b6f0-5258c9aef4e8
[I 2025-05-08 19:18:32,891] Trial 0 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 73.158598707212, 'classifier__n_estimators': 92, 'classifier__max_depth': 4, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.12000000000000001}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run enchanting-finch-839 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/298076c30fef4ac697ccc9006c1d33de
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:18:54,988] Trial 1 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 17.021426650421997, 'classifier__n_estimators': 46, 'classifier__max_depth': 3, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.33}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run agreeable-elk-867 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/52ff586c8ca64435a7f8f722f599f8dd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:19:34,950] Trial 2 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 51.06943671682048, 'classifier__n_estimators': 78, 'classifier__max_depth': 7, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.11}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run skillful-stoat-113 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0a343cec6b994587b3abbe46f48b3241
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:20:04,610] Trial 3 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 26.643855625055927, 'classifier__n_estimators': 52, 'classifier__max_depth': 6, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.29000000000000004}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run resilient-hare-841 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/5ed07e219a49478bb0ab51dca471fd5e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:20:30,557] Trial 4 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 46.92652318224698, 'classifier__n_estimators': 34, 'classifier__max_depth': 7, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.21000000000000002}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run wise-grub-955 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6b9a1d3fe527443a918c05cc3e7fde46
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:21:05,903] Trial 5 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 34.139562806075034, 'classifier__n_estimators': 99, 'classifier__max_depth': 4, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.5}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run upset-foal-25 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/7e6dc124121b4fa4ad0d98f50cb6ff59
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:21:45,130] Trial 6 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 85.90830521466395, 'classifier__n_estimators': 84, 'classifier__max_depth': 6, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.19}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run overjoyed-owl-498 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/4d944490f4be4d4a8bd17bcf2dbfd23e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:22:07,983] Trial 7 finished with value: 0.8161356925831864 and parameters: {'positive_class_weight': 12.110170282339004, 'classifier__n_estimators': 28, 'classifier__max_depth': 6, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.29000000000000004}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run exultant-sow-142 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/22fa90b746f8402b923637baad454e39
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:22:51,699] Trial 8 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 63.245910339226164, 'classifier__n_estimators': 87, 'classifier__max_depth': 7, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.26}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run colorful-jay-52 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/abda21b139d94f7a97496b3cc16a7872
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:23:21,011] Trial 9 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 12.63333958709192, 'classifier__n_estimators': 49, 'classifier__max_depth': 6, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.22}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run industrious-cow-553 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6f4537c2fbd7470b969a2454e0ce5462
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:23:50,086] Trial 10 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 90.67526869337568, 'classifier__n_estimators': 68, 'classifier__max_depth': 4, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run wistful-tern-879 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/ee916ba1b4194451b72beec81c405da2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:24:11,728] Trial 11 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 73.30580190620489, 'classifier__n_estimators': 42, 'classifier__max_depth': 3, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.37}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run shivering-duck-9 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/55f44ab87e1e48e38a30bb4a6b0b8cd6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:24:37,535] Trial 12 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 72.43570021846946, 'classifier__n_estimators': 67, 'classifier__max_depth': 3, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.37}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run rumbling-snipe-850 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/252866378f0f48b0a79cb0d488d612e8
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:25:04,543] Trial 13 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 97.95418032071406, 'classifier__n_estimators': 58, 'classifier__max_depth': 4, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.39}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run brawny-mole-110 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/f269645495d34de7b193b844ae4f710d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 19:25:29,600] Trial 14 finished with value: 0.0 and parameters: {'positive_class_weight': 2.2871352159022855, 'classifier__n_estimators': 39, 'classifier__max_depth': 5, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45000000000000007}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run masked-sow-168 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a4e6d9588560476c81568a15a72d67b0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:26:01,079] Trial 15 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 38.68379439576977, 'classifier__n_estimators': 100, 'classifier__max_depth': 3, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.17}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run agreeable-owl-625 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0017a93482df45469ef6cade41821e27
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:26:20,591] Trial 16 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 62.39541903576869, 'classifier__n_estimators': 20, 'classifier__max_depth': 5, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.33999999999999997}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run luminous-kit-449 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/c8165dc0fd75442ba8925955ad48bfba
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:26:50,179] Trial 17 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 23.384425768714095, 'classifier__n_estimators': 71, 'classifier__max_depth': 4, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.43000000000000005}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run worried-donkey-987 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/bb5d3802941d49bca68dc79e1a66dc9a
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:27:37,428] Trial 18 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 50.27033207209973, 'classifier__n_estimators': 90, 'classifier__max_depth': 8, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.14}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run whimsical-turtle-145 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/80fc6d7fe7274f1fa5e2518e1a2b3cd6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:28:02,047] Trial 19 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 77.977401589138, 'classifier__n_estimators': 56, 'classifier__max_depth': 3, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.24000000000000002}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run painted-swan-224 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/104d46016e6b4ab9a54b587494284db0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:28:27,979] Trial 20 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 62.140510995661415, 'classifier__n_estimators': 44, 'classifier__max_depth': 5, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.33}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run unruly-horse-810 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/bb3f58610b7c40cf94a7cf2f9343da52
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:29:12,532] Trial 21 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 41.658850601058276, 'classifier__n_estimators': 77, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run marvelous-shoat-70 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b0554ae6b3cb41f6a815847a64245349
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:29:53,725] Trial 22 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 55.79277013966652, 'classifier__n_estimators': 80, 'classifier__max_depth': 7, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.15000000000000002}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run adorable-hound-241 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6d904c7daec6486582423bb12bbefc84
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:30:27,865] Trial 23 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 54.45594875683711, 'classifier__n_estimators': 92, 'classifier__max_depth': 4, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.13}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run kindly-ram-982 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/f8ce77e526314ea796e7b6dcdeb2a5c2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:30:55,259] Trial 24 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 30.67432506819533, 'classifier__n_estimators': 75, 'classifier__max_depth': 3, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.17}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run worried-conch-620 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b54cead6b2734852a5260449a6d525d4
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:31:26,992] Trial 25 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 83.82711241369678, 'classifier__n_estimators': 66, 'classifier__max_depth': 5, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.27}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run grandiose-mule-41 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/550d1c6789b64b81b37576c80dea0940
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:32:12,642] Trial 26 finished with value: 0.8161102362088646 and parameters: {'positive_class_weight': 19.359172133158506, 'classifier__n_estimators': 94, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.32}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run wise-mink-483 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/bb59d1da4ed34ccfad24319ab74c2030
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:32:44,879] Trial 27 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 67.42514700828647, 'classifier__n_estimators': 83, 'classifier__max_depth': 4, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.12000000000000001}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run dashing-roo-562 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/14b36aa18286416c952f977b4f984dbf
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:33:22,935] Trial 28 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 45.00637099105667, 'classifier__n_estimators': 63, 'classifier__max_depth': 8, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.19}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run capricious-squid-244 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/bec306a22fe9442b809b30d81b05ff1d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 19:33:46,539] Trial 29 finished with value: 0.0 and parameters: {'positive_class_weight': 2.2869881851729588, 'classifier__n_estimators': 49, 'classifier__max_depth': 3, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.29000000000000004}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run fun-gnu-355 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/da9046fb24f349c2af1dde73d6bcf251
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691
🏃 View run RandomForestClassifier_v2__application_train_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/85b718324b97472788ca2033bb6f2a51
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


In [27]:
X_train, X_test, y_train, y_test, preproc = prepare_data(
    data[
        ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH", "TARGET"]
    ],
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=True,
)


run_experiment(
    model_name="RandomForestClassifier_v2",
    model_class=RandomForestClassifier,
    param_space_fn=rf_param_space,
    dataset_name="application_train_poly_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 1 column(s) with missing ratio ≥ 0.5:
 - EXT_SOURCE_1
Found 3 numerical features
Found 0 binary features
Found 0 categorical features


[I 2025-05-08 19:34:04,911] A new study created in memory with name: no-name-f474e3ce-847f-46d7-96c2-e2c61cb3fa7c
[I 2025-05-08 19:34:22,529] Trial 0 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 28.563206398980054, 'classifier__n_estimators': 41, 'classifier__max_depth': 3, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.27}. Best is trial 0 with value: 0.8161458163978155.


🏃 View run useful-rat-747 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/26aae9f4180149b884aca1dbc35f7056
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:35:02,947] Trial 1 finished with value: 0.8161464727958555 and parameters: {'positive_class_weight': 55.45954596578046, 'classifier__n_estimators': 49, 'classifier__max_depth': 7, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.18}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run judicious-mouse-315 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a9b1a8e006a14b5ab050fb979814c034
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:35:57,496] Trial 2 finished with value: 0.7467184063441803 and parameters: {'positive_class_weight': 11.748333560290652, 'classifier__n_estimators': 75, 'classifier__max_depth': 6, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.33}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run youthful-eel-133 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/36bba9957a6a4b78b6b581ba99649c83
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:36:35,961] Trial 3 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 72.93483394578463, 'classifier__n_estimators': 97, 'classifier__max_depth': 3, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.21000000000000002}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run selective-lark-872 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/3229defccad3462393f394a795c8fb9d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:37:11,734] Trial 4 finished with value: 0.5562393606890716 and parameters: {'positive_class_weight': 9.926301461812756, 'classifier__n_estimators': 57, 'classifier__max_depth': 5, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.47}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run dapper-crow-932 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/9832140230ab4e0ba039d0d6cb282621
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:37:53,375] Trial 5 finished with value: 0.717224327795186 and parameters: {'positive_class_weight': 10.96168886008273, 'classifier__n_estimators': 58, 'classifier__max_depth': 6, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.35}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run zealous-duck-342 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/52130792070c4c07a06c4e84089a8167
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:39:00,268] Trial 6 finished with value: 0.800454167076058 and parameters: {'positive_class_weight': 40.98942639256253, 'classifier__n_estimators': 84, 'classifier__max_depth': 7, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.49}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run powerful-newt-32 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/83b42acbcd3a47ad83a4a36823369471
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:39:37,376] Trial 7 finished with value: 0.8161462758745751 and parameters: {'positive_class_weight': 78.90149475287258, 'classifier__n_estimators': 72, 'classifier__max_depth': 4, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.49}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run orderly-bat-447 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0f224ff4846a4fa8a478c1dd65e79a31
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:40:40,737] Trial 8 finished with value: 0.8161458820320159 and parameters: {'positive_class_weight': 76.77761092563715, 'classifier__n_estimators': 90, 'classifier__max_depth': 6, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.18}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run stately-roo-534 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/73dc3ead30a74617a7aaff670c323d72
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:40:56,681] Trial 9 finished with value: 0.4235686738809406 and parameters: {'positive_class_weight': 7.255430647637531, 'classifier__n_estimators': 35, 'classifier__max_depth': 3, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45999999999999996}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run intrigued-wasp-911 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/2a002d9d7dbf43be8042dee62c728c87
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:41:20,150] Trial 10 finished with value: 0.8161461445821587 and parameters: {'positive_class_weight': 99.82312779588756, 'classifier__n_estimators': 24, 'classifier__max_depth': 8, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run charming-sheep-932 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/5da51c88bb274dd8b08b8540e9c82ea2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:41:55,913] Trial 11 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 64.92489126912585, 'classifier__n_estimators': 70, 'classifier__max_depth': 4, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.39}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run painted-bee-302 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/3b669e0f633b47f9beaecfce18ad48e2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:42:39,844] Trial 12 finished with value: 0.8161461445955002 and parameters: {'positive_class_weight': 53.634861751546424, 'classifier__n_estimators': 48, 'classifier__max_depth': 8, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run salty-roo-494 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/ca689f7e2de6441e9be8a1e7c98ad33d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:43:22,780] Trial 13 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 89.67882142012796, 'classifier__n_estimators': 69, 'classifier__max_depth': 5, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.24000000000000002}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run adventurous-doe-552 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/d991ca6737e24e7c82c0c0ac3dd8a03e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:44:01,933] Trial 14 finished with value: 0.8161057059413586 and parameters: {'positive_class_weight': 52.00157057892222, 'classifier__n_estimators': 47, 'classifier__max_depth': 7, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.16}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run abrasive-donkey-750 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/98f489e8f9ce40f2a93b2f3911bbbd1c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:44:41,357] Trial 15 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 83.51890356011712, 'classifier__n_estimators': 77, 'classifier__max_depth': 4, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.41000000000000003}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run chill-frog-106 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/876bd87816764d98abd9455e4df09988
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:45:31,971] Trial 16 finished with value: 0.8157589787853048 and parameters: {'positive_class_weight': 38.41469635286866, 'classifier__n_estimators': 62, 'classifier__max_depth': 7, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.27}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run delicate-perch-596 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/45dfe8f7558a437badbc78f9eabd830f
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:45:49,097] Trial 17 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 63.071205746928804, 'classifier__n_estimators': 30, 'classifier__max_depth': 4, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.15000000000000002}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run likeable-snipe-966 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/aabe806992d240648c45b084b8aa5fb7
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:46:23,326] Trial 18 finished with value: 0.8160691420033314 and parameters: {'positive_class_weight': 63.14856182955046, 'classifier__n_estimators': 54, 'classifier__max_depth': 5, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.41000000000000003}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run hilarious-fawn-789 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/2a158245fa8c4be0a53af2c96219379d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:47:17,703] Trial 19 finished with value: 0.8159121835179307 and parameters: {'positive_class_weight': 93.09888095252313, 'classifier__n_estimators': 66, 'classifier__max_depth': 7, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.32}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run sedate-bear-399 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/4236063fd9cb4658bb454562c6f9cf13
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:47:58,979] Trial 20 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 40.75460067302807, 'classifier__n_estimators': 82, 'classifier__max_depth': 4, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.22}. Best is trial 1 with value: 0.8161464727958555.


🏃 View run intelligent-tern-989 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b80ef3a9d780434ca00a4b180ea4c7cd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:48:42,690] Trial 21 finished with value: 0.8161466041256366 and parameters: {'positive_class_weight': 53.2224679814509, 'classifier__n_estimators': 47, 'classifier__max_depth': 8, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 21 with value: 0.8161466041256366.


🏃 View run spiffy-conch-189 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/ef547592aea647eda236f62cc872c4cf
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:49:28,106] Trial 22 finished with value: 0.8160675652199798 and parameters: {'positive_class_weight': 30.998458799544792, 'classifier__n_estimators': 49, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.14}. Best is trial 21 with value: 0.8161466041256366.


🏃 View run upbeat-colt-120 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/18390fcb15894fd7a4d58352acc1e23e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:50:05,149] Trial 23 finished with value: 0.8161465384487361 and parameters: {'positive_class_weight': 72.69215023816022, 'classifier__n_estimators': 39, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.13}. Best is trial 21 with value: 0.8161466041256366.


🏃 View run orderly-snipe-812 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/124b55ae44554ac4803487cbefca7eae
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:50:41,035] Trial 24 finished with value: 0.8161469323606896 and parameters: {'positive_class_weight': 56.94472624924934, 'classifier__n_estimators': 38, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.12000000000000001}. Best is trial 24 with value: 0.8161469323606896.


🏃 View run mercurial-mole-78 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b836be3627e1494dbd191c319bf64b6c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:51:02,311] Trial 25 finished with value: 0.8161064939332601 and parameters: {'positive_class_weight': 70.29497487928819, 'classifier__n_estimators': 20, 'classifier__max_depth': 8, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.12000000000000001}. Best is trial 24 with value: 0.8161469323606896.


🏃 View run serious-fish-58 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a025cdfd516645d6b9013a1669ecc5f9
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:51:38,468] Trial 26 finished with value: 0.8161474575757581 and parameters: {'positive_class_weight': 46.52154220757578, 'classifier__n_estimators': 38, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.13}. Best is trial 26 with value: 0.8161474575757581.


🏃 View run skillful-snake-561 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0719593846d0407b8a8f81d92c217414
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:52:08,557] Trial 27 finished with value: 0.815949533652345 and parameters: {'positive_class_weight': 46.21023208829837, 'classifier__n_estimators': 31, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.18}. Best is trial 26 with value: 0.8161474575757581.


🏃 View run smiling-hound-989 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a3623cf7b96e401aa9f1d9e84f4673c2
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:52:42,849] Trial 28 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 28.345148479257556, 'classifier__n_estimators': 41, 'classifier__max_depth': 7, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.1}. Best is trial 26 with value: 0.8161474575757581.


🏃 View run fun-sheep-821 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/3ca0156f7bb842f8a552cdee9c782746
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:53:23,269] Trial 29 finished with value: 0.813271398072315 and parameters: {'positive_class_weight': 26.413794407089352, 'classifier__n_estimators': 44, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.27}. Best is trial 26 with value: 0.8161474575757581.


🏃 View run dapper-mare-39 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/89132e1af0ed4a8883e293f76609f012
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691
🏃 View run RandomForestClassifier_v2__application_train_poly_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b80e4210d0424a91834e4d3396a55b60
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


In [28]:
data_feat_engineer = data.copy()

data_feat_engineer["CREDIT_INCOME_PERCENT"] = (
    data_feat_engineer["AMT_CREDIT"] / data_feat_engineer["AMT_INCOME_TOTAL"]
)
data_feat_engineer["ANNUITY_INCOME_PERCENT"] = (
    data_feat_engineer["AMT_ANNUITY"] / data_feat_engineer["AMT_INCOME_TOTAL"]
)
data_feat_engineer["CREDIT_TERM"] = (
    data_feat_engineer["AMT_ANNUITY"] / data_feat_engineer["AMT_CREDIT"]
)
data_feat_engineer["DAYS_EMPLOYED_PERCENT"] = (
    data_feat_engineer["DAYS_EMPLOYED"] / data_feat_engineer["DAYS_BIRTH"]
)

X_train, X_test, y_train, y_test, preproc = prepare_data(
    data_feat_engineer,
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.5,
    polynomial_features=False,
)


run_experiment(
    model_name="RandomForestClassifier_v2",
    model_class=RandomForestClassifier,
    param_space_fn=rf_param_space,
    dataset_name="application_train_feat_engineer_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=30,
)


🧹 Dropping 41 column(s) with missing ratio ≥ 0.5:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIVINGAPARTMENTS_MEDI
 - NONLIVINGAREA_MEDI
 - FONDKAPREMONT_MODE
 - HOUSETYPE_MODE
 - WALLSMATERIAL_MODE
Found 71 numerical features
Found 4 binary features
Found 9 categorical features


[I 2025-05-08 19:53:41,739] A new study created in memory with name: no-name-11c043b4-b394-4339-a72e-89d84a69c44a
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 19:54:06,808] Trial 0 finished with value: 0.0 and parameters: {'positive_class_weight': 1.632215279765353, 'classifier__n_estimators': 28, 'classifier__max_depth': 7, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.33}. Best is trial 0 with value: 0.0.


🏃 View run auspicious-cub-656 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/3188849f5ea94abbbdba4b801bcdcd6d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:54:42,539] Trial 1 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 67.25979081658693, 'classifier__n_estimators': 64, 'classifier__max_depth': 6, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45000000000000007}. Best is trial 1 with value: 0.8161458163978155.


🏃 View run monumental-gull-730 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6f4ce8c82bbe468e80f3b8949dcb6fcf
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
[I 2025-05-08 19:55:12,359] Trial 2 finished with value: 0.0 and parameters: {'positive_class_weight': 2.3942975841573158, 'classifier__n_estimators': 53, 'classifier__max_depth': 5, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.42000000000000004}. Best is trial 1 with value: 0.8161458163978155.


🏃 View run sedate-grouse-608 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/bdfe87ef17434a66a11474d5277ab477
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:55:42,133] Trial 3 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 61.03195783322261, 'classifier__n_estimators': 82, 'classifier__max_depth': 3, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.21000000000000002}. Best is trial 1 with value: 0.8161458163978155.


🏃 View run charming-stag-101 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/f914065f9cd34ae39ab1415cebd3c404
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:56:06,769] Trial 4 finished with value: 0.07660321404096289 and parameters: {'positive_class_weight': 5.170766842223841, 'classifier__n_estimators': 42, 'classifier__max_depth': 4, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 3, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.43000000000000005}. Best is trial 1 with value: 0.8161458163978155.


🏃 View run handsome-bass-501 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a4dd826bd3e44faeaf9b1b5afaf88e85
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:56:29,526] Trial 5 finished with value: 0.8161492956215408 and parameters: {'positive_class_weight': 34.866310417896884, 'classifier__n_estimators': 20, 'classifier__max_depth': 8, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.29000000000000004}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run loud-fox-28 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/52697f126e5d46fba51e48cad4c26b08
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:56:52,861] Trial 6 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 49.068332026786294, 'classifier__n_estimators': 38, 'classifier__max_depth': 4, 'classifier__min_samples_split': 8, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.42000000000000004}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run puzzled-moth-313 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/dca82765562847ad8fa052c18e763044
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:57:13,788] Trial 7 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 82.10385761064667, 'classifier__n_estimators': 27, 'classifier__max_depth': 4, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 1, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.11}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run shivering-crane-467 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a6d02e856616465cb7e25773b5dde3f0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:57:49,262] Trial 8 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 48.99429853759385, 'classifier__n_estimators': 87, 'classifier__max_depth': 4, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.35}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run welcoming-duck-24 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/e9872aa17a064e8f935fd36daf037629
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:58:14,653] Trial 9 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 60.97804314091279, 'classifier__n_estimators': 36, 'classifier__max_depth': 5, 'classifier__min_samples_split': 10, 'classifier__min_samples_leaf': 2, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.18}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run capable-gull-728 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0aaa4f8e5ce2411b914eab435dfe3c9e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:58:57,488] Trial 10 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 28.003264075645333, 'classifier__n_estimators': 68, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.25}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run upbeat-bass-95 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6d2e55a36e7441b8923549db4c9c7dfd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 19:59:36,650] Trial 11 finished with value: 0.8161461445661496 and parameters: {'positive_class_weight': 96.56686372144982, 'classifier__n_estimators': 65, 'classifier__max_depth': 7, 'classifier__min_samples_split': 2, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.49}. Best is trial 5 with value: 0.8161492956215408.


🏃 View run ambitious-eel-501 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/2f44f4b224104432bb9c0ca30b9b4ed5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:00:33,153] Trial 12 finished with value: 0.8161581653718329 and parameters: {'positive_class_weight': 90.45923147432534, 'classifier__n_estimators': 98, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.5}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run angry-roo-831 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b18cad891fe44c6f9be1adb5a78a0fb6
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:01:34,107] Trial 13 finished with value: 0.8161463414687509 and parameters: {'positive_class_weight': 31.158802333849025, 'classifier__n_estimators': 99, 'classifier__max_depth': 8, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.29000000000000004}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run bald-mare-214 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a5441bb7411949669a19aa1b0fe32a2b
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:02:08,009] Trial 14 finished with value: 0.8161471291645398 and parameters: {'positive_class_weight': 33.6198299247642, 'classifier__n_estimators': 52, 'classifier__max_depth': 7, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.36}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run inquisitive-gull-492 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/0100abcc7fa040d58065c67809ec1e3d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:02:54,891] Trial 15 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 96.71731497369088, 'classifier__n_estimators': 77, 'classifier__max_depth': 8, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 6, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.27}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run rare-goat-534 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6c1ad2ceaf4c46c683cd95eddea6ff8d
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:03:16,636] Trial 16 finished with value: 0.7719878603425414 and parameters: {'positive_class_weight': 17.929048319942567, 'classifier__n_estimators': 20, 'classifier__max_depth': 6, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 4, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.5}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run wise-newt-873 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/a704f0845c854d628d40a797dc59f2c0
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:04:08,585] Trial 17 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 78.88155412030787, 'classifier__n_estimators': 99, 'classifier__max_depth': 7, 'classifier__min_samples_split': 9, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.38}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run gifted-bat-71 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/ed56c78ecf2944f8ac0b019599af397c
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:04:43,135] Trial 18 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 41.586591421008684, 'classifier__n_estimators': 49, 'classifier__max_depth': 8, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.22}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run dazzling-cub-699 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/dbd143aee4c64ff3a4762839dfd0e301
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:05:28,081] Trial 19 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 82.32068131775918, 'classifier__n_estimators': 90, 'classifier__max_depth': 6, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 5, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.15000000000000002}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run able-pig-352 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/6e3e53e0d72947ba890a6bf2616b38f5
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:06:13,391] Trial 20 finished with value: 0.8159844254006386 and parameters: {'positive_class_weight': 19.932251258564868, 'classifier__n_estimators': 70, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.31}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run victorious-calf-747 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/781c8474f084413abc895aa771c1b732
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:06:47,775] Trial 21 finished with value: 0.8161480481874421 and parameters: {'positive_class_weight': 34.256851314307994, 'classifier__n_estimators': 53, 'classifier__max_depth': 7, 'classifier__min_samples_split': 4, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.37}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run nosy-crane-331 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/226fa5f9d5a546959a5336864b695a65
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:07:19,226] Trial 22 finished with value: 0.8161085288173525 and parameters: {'positive_class_weight': 38.23873991512561, 'classifier__n_estimators': 45, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.39}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run merciful-crane-926 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/b7e7f65679fb41419c2a5b9b7cac73ac
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:07:58,286] Trial 23 finished with value: 0.8156534009285407 and parameters: {'positive_class_weight': 17.692266763592613, 'classifier__n_estimators': 58, 'classifier__max_depth': 8, 'classifier__min_samples_split': 3, 'classifier__min_samples_leaf': 8, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.32}. Best is trial 12 with value: 0.8161581653718329.


🏃 View run capricious-pig-998 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/ec9f9cccc0f3463d8c30d3e0f4c9a88e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:08:42,264] Trial 24 finished with value: 0.8161809437082757 and parameters: {'positive_class_weight': 40.87479728395347, 'classifier__n_estimators': 77, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45999999999999996}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run bold-sow-130 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/68c41013d19b488595e2cc1629a12ffd
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:09:28,814] Trial 25 finished with value: 0.8159628173025671 and parameters: {'positive_class_weight': 54.34692874414321, 'classifier__n_estimators': 76, 'classifier__max_depth': 8, 'classifier__min_samples_split': 7, 'classifier__min_samples_leaf': 7, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.48}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run bold-crab-275 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/29cb1222ca40421887616aabcaa2e5ba
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:10:18,250] Trial 26 finished with value: 0.8161682298360159 and parameters: {'positive_class_weight': 42.67633887031423, 'classifier__n_estimators': 92, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45999999999999996}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run capricious-seal-2 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/185e3dd9f6ea434e90e045891eeed677
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:11:03,713] Trial 27 finished with value: 0.8161462758372184 and parameters: {'positive_class_weight': 45.85947510997665, 'classifier__n_estimators': 92, 'classifier__max_depth': 6, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45999999999999996}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run traveling-goose-957 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/e0c3b4f651cc4ce6bedb0a761ca4b9ee
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:11:49,611] Trial 28 finished with value: 0.8161466040429092 and parameters: {'positive_class_weight': 74.29760649090755, 'classifier__n_estimators': 82, 'classifier__max_depth': 7, 'classifier__min_samples_split': 6, 'classifier__min_samples_leaf': 9, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.45999999999999996}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run upset-mare-466 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/054438f47c494f40abfae14f7fad0a86
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


[I 2025-05-08 20:12:40,033] Trial 29 finished with value: 0.8161458163978155 and parameters: {'positive_class_weight': 87.44184469478907, 'classifier__n_estimators': 94, 'classifier__max_depth': 7, 'classifier__min_samples_split': 5, 'classifier__min_samples_leaf': 10, 'classifier__max_features': 'sqrt', 'classifier__bootstrap': True, 'threshold': 0.4}. Best is trial 24 with value: 0.8161809437082757.


🏃 View run funny-ape-717 at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/8abcf86da94d4b0ea2b0ea96aba1792e
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


🏃 View run RandomForestClassifier_v2__application_train_feat_engineer_managed_outliers_mising_cols__study_run at: http://127.0.0.1:8080/#/experiments/928276634314426691/runs/eb40959099a04b92a6e71690f586f362
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/928276634314426691


## LightGBM

In [32]:
def lightgbm_param_space(
    trial: optuna.trial.Trial,
    class_weight_range: Tuple[float, float],
    n_estimators_range: Tuple[int, int],
    learning_rate_range: Tuple[float, float],
    num_leaves_range: Tuple[int, int],
    max_depth_range: Tuple[int, int],
    min_child_samples_range: Tuple[int, int],
    subsample_range: Tuple[float, float],
    colsample_bytree_range: Tuple[float, float],
    reg_alpha_range: Tuple[float, float],
    reg_lambda_range: Tuple[float, float],
) -> Dict[str, Any]:
    """
    Suggests a hyperparameter configuration for an LGBMClassifier using Optuna.

    Args:
        trial: Optuna trial object used to sample hyperparameters.
        All other arguments define the range of hyperparameters.

    Returns:
        A dictionary of hyperparameters for a scikit-learn Pipeline.
    """
    pos_weight = trial.suggest_float(
        "positive_class_weight", *class_weight_range
    )
    class_weight = {0: 1, 1: pos_weight}

    params = {
        "classifier__n_estimators": trial.suggest_int(
            "classifier__n_estimators", *n_estimators_range
        ),
        "classifier__learning_rate": trial.suggest_float(
            "classifier__learning_rate", *learning_rate_range, log=True
        ),
        "classifier__num_leaves": trial.suggest_int(
            "classifier__num_leaves", *num_leaves_range
        ),
        "classifier__max_depth": trial.suggest_int(
            "classifier__max_depth", *max_depth_range
        ),
        "classifier__min_child_samples": trial.suggest_int(
            "classifier__min_child_samples", *min_child_samples_range
        ),
        "classifier__subsample": trial.suggest_float(
            "classifier__subsample", *subsample_range
        ),
        "classifier__colsample_bytree": trial.suggest_float(
            "classifier__colsample_bytree", *colsample_bytree_range
        ),
        "classifier__reg_alpha": trial.suggest_float(
            "classifier__reg_alpha", *reg_alpha_range
        ),
        "classifier__reg_lambda": trial.suggest_float(
            "classifier__reg_lambda", *reg_lambda_range
        ),
        "classifier__class_weight": class_weight,
    }
    return params


lgb_param_space = partial(
    lightgbm_param_space,
    class_weight_range=(1, 100),
    n_estimators_range=(50, 150),
    learning_rate_range=(0.01, 0.2),
    num_leaves_range=(20, 50),
    max_depth_range=(3, 6),
    min_child_samples_range=(10, 50),
    subsample_range=(0.6, 1.0),
    colsample_bytree_range=(0.6, 1.0),
    reg_alpha_range=(0.0, 1.0),
    reg_lambda_range=(0.0, 1.0),
)


In [36]:
X_train, X_test, y_train, y_test, preproc = prepare_data(
    data,
    target_column="TARGET",
    categorical_strategy="most_frequent",
    outlier_strategy="replace_with_nan",
    outlier_threshold=0.05,
    test_size=0.2,
    random_state=42,
    encode_categoricals=True,
    drop_missing_columns=True,
    missing_col_threshold=0.4,
    polynomial_features=True,
)


run_experiment(
    model_name="LGBMClassifier_v2",
    model_class=lgb.LGBMClassifier,
    param_space_fn=lgb_param_space,
    dataset_name="application_train_managed_outliers_mising_cols",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    preprocessor=preproc,
    cv=5,
    n_trials=10,
)


🧹 Dropping 49 column(s) with missing ratio ≥ 0.4:
 - OWN_CAR_AGE
 - EXT_SOURCE_1
 - APARTMENTS_AVG
 - BASEMENTAREA_AVG
 - YEARS_BEGINEXPLUATATION_AVG
 - YEARS_BUILD_AVG
 - COMMONAREA_AVG
 - ELEVATORS_AVG
 - ENTRANCES_AVG
 - FLOORSMAX_AVG
 - FLOORSMIN_AVG
 - LANDAREA_AVG
 - LIVINGAPARTMENTS_AVG
 - LIVINGAREA_AVG
 - NONLIVINGAPARTMENTS_AVG
 - NONLIVINGAREA_AVG
 - APARTMENTS_MODE
 - BASEMENTAREA_MODE
 - YEARS_BEGINEXPLUATATION_MODE
 - YEARS_BUILD_MODE
 - COMMONAREA_MODE
 - ELEVATORS_MODE
 - ENTRANCES_MODE
 - FLOORSMAX_MODE
 - FLOORSMIN_MODE
 - LANDAREA_MODE
 - LIVINGAPARTMENTS_MODE
 - LIVINGAREA_MODE
 - NONLIVINGAPARTMENTS_MODE
 - NONLIVINGAREA_MODE
 - APARTMENTS_MEDI
 - BASEMENTAREA_MEDI
 - YEARS_BEGINEXPLUATATION_MEDI
 - YEARS_BUILD_MEDI
 - COMMONAREA_MEDI
 - ELEVATORS_MEDI
 - ENTRANCES_MEDI
 - FLOORSMAX_MEDI
 - FLOORSMIN_MEDI
 - LANDAREA_MEDI
 - LIVINGAPARTMENTS_MEDI
 - LIVINGAREA_MEDI
 - NONLIVINGAPARTMENTS_MEDI
 - NONLIVINGAREA_MEDI
 - FONDKAPREMONT_MODE
 - HOUSETYPE_MODE
 - TOTALARE

2025/05/08 23:52:14 INFO mlflow.tracking.fluent: Experiment with name 'LGBMClassifier_v2' does not exist. Creating a new experiment.
[I 2025-05-08 23:52:14,208] A new study created in memory with name: no-name-0959e83b-ea37-4f12-9c5d-71b04e02a132


: 

In [31]:
# X_train, X_test, y_train, y_test, preproc = prepare_data(
#     data[
#         ["EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "DAYS_BIRTH", "TARGET"]
#     ],
#     target_column="TARGET",
#     categorical_strategy="most_frequent",
#     outlier_strategy="replace_with_nan",
#     outlier_threshold=0.05,
#     test_size=0.2,
#     random_state=42,
#     encode_categoricals=True,
#     drop_missing_columns=True,
#     missing_col_threshold=0.4,
#     polynomial_features=True,
# )


# run_experiment(
#     model_name="LGBMClassifier",
#     model_class=lgb.LGBMClassifier,
#     param_space_fn=lgb_param_space,
#     dataset_name="application_train_poly_managed_outliers_mising_cols",
#     X_train=X_train,
#     X_test=X_test,
#     y_train=y_train,
#     y_test=y_test,
#     preprocessor=preproc,
#     cv=5,
#     n_trials=30,
# )
