In [1]:
import numpy as np
import pandas as pd
from scipy import stats

import lightgbm as lgb
from catboost import CatBoostClassifier, Pool
from sklearn.linear_model import LogisticRegression
from sklearn.base import BaseEstimator, ClassifierMixin

from sklearn.model_selection import train_test_split, StratifiedGroupKFold,\
GroupShuffleSplit, StratifiedKFold, GroupKFold
from sklearn.metrics import roc_auc_score

from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import VotingClassifier, StackingClassifier, RandomForestClassifier

from metrics import get_metrics, check_overfitting

import warnings
warnings.filterwarnings('ignore')

In [4]:
ROOT = "../"

RAND = 10
N_SPLIT_TRAIN = 0.18
N_SPLIT_VAL = 0.15
N_FOLDS = 5

# Overview
Данные взяты из соревания на Kaggle. Основная информация по данным представлена здесь https://www.kaggle.com/competitions/home-credit-credit-risk-model-stability/data

Цель - предсказать, какие клиенты с большей вероятностью не смогут выплатить свои кредиты и оценка должна быть стабильна с течением времени

# Prepare data

In [5]:
df_train = pd.read_parquet(f"{ROOT}train4.parquet")

In [89]:
metrics = pd.read_csv(f"tuning_metrics.csv")

In [6]:
X = df_train.drop(columns=["target", "case_id", "WEEK_NUM"])
y = df_train["target"]
weeks = df_train["WEEK_NUM"]

In [7]:
def train_test_split_by_groups(X: pd.DataFrame,
                               y: pd.Series,
                               groups: pd.Series,
                               test_size=None,
                               train_size=None,
                               random_state=None,
                               shuffle=True):
    """
    Split arrays or matrices into random train and test subsets 
    according groups.
    """
    if shuffle:
        gss = GroupShuffleSplit(n_splits=1,
                                test_size=test_size,
                                train_size=train_size,
                                random_state=random_state)
        train_idx, test_idx = next(gss.split(X, y, groups))
        return X.iloc[train_idx], X.iloc[test_idx], y.iloc[train_idx],\
            y.iloc[test_idx], groups.iloc[train_idx], groups.iloc[test_idx]
    else:
        return train_test_split(X,
                                y,
                                groups,
                                test_size=test_size,
                                train_size=train_size,
                                shuffle=False,
                                random_state=RAND)

In [8]:
X_train, X_test, y_train, y_test, w_train, w_test = train_test_split_by_groups(
    X, 
    y, 
    weeks, 
    test_size=N_SPLIT_TRAIN, 
    random_state=RAND)

print(f"All train: {round(X_train.shape[0] / X.shape[0] * 100, 2)}%")
print(f"test: {round(X_test.shape[0] / X.shape[0] * 100, 2)}%")

All train: 79.67%
test: 20.33%


In [9]:
X_train_bin, X_test_bin, y_train_bin, y_test_bin, w_train_bin, \
    w_test_bin = train_test_split_by_groups(pd.get_dummies(X),
                                            y,
                                            weeks,
                                            test_size=N_SPLIT_TRAIN,
                                            random_state=RAND)

print(f"All train: {round(X_train_bin.shape[0] / X.shape[0] * 100, 2)}%")
print(f"test: {round(X_test_bin.shape[0] / X.shape[0] * 100, 2)}%")
print(X_test_bin.shape)

All train: 79.67%
test: 20.33%
(310442, 661)


# Stacking hand tuning

In [10]:
category_features = df_train.select_dtypes("category").columns.to_list()

meta_X = pd.DataFrame()
meta_X_test = pd.DataFrame()

## LGBMClassifier tuned (300 estimators)

In [11]:
params = {
    'n_estimators': 300,
    'learning_rate': 0.20701321225348068,
    'max_depth': 4,
    'min_child_samples': 1500,
    'reg_alpha': 0.27364853822678964,
    'reg_lambda': 0.015150234302393548,
    'colsample_bytree': 0.4585698361989257,
    'colsample_bynode': 0.8943213484326571,
    'objective': 'binary',
    'random_state': RAND,
    'verbose': -1
}

pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train,
           idx_valid) in enumerate(cv.split(X_train, y_train, groups=w_train)):
    X_train_, y_train_ = X_train.iloc[idx_train], y_train.iloc[idx_train]
    X_val, y_val = X_train.iloc[idx_valid], y_train.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = lgb.LGBMClassifier(scale_pos_weight=ratio, **params)

    model.fit(X_train_,
              y_train_,
              eval_metric="auc",
              eval_set=eval_set,
              callbacks=[lgb.early_stopping(100)])

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.5f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train, y_train)
y_score_test = model.predict_proba(X_test)
print("Test ROC-AUC SCORE %.5f" % roc_auc_score(y_test, y_score_test[:, 1]))

meta_X['lgb_tuned300'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['lgb_tuned300'] = y_score_test[:, 1]

Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[295]	valid_0's auc: 0.850387	valid_0's binary_logloss: 0.457393
Fold: 1 ROC-AUC SCORE 0.85039
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[300]	valid_0's auc: 0.84576	valid_0's binary_logloss: 0.485382
Fold: 2 ROC-AUC SCORE 0.84576
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[281]	valid_0's auc: 0.840922	valid_0's binary_logloss: 0.473609
Fold: 3 ROC-AUC SCORE 0.84092
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[300]	valid_0's auc: 0.847096	valid_0's binary_logloss: 0.461914
Fold: 4 ROC-AUC SCORE 0.84710
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[299]	valid_0's auc: 0.850711	valid_0's binary_logloss:

## LGBMClassifier  tuned (1000 estimators)

In [12]:
params = {
    'n_estimators': 1000,
    'learning_rate': 0.201557703425373,
    'max_depth': 6,
    'min_child_samples': 59200,
    'reg_alpha': 1.5870626653704198,
    'reg_lambda': 0.08870954489127132,
    'colsample_bytree': 0.8987319208510414,
    'colsample_bynode': 0.8307150795884893,
    'objective': 'binary',
    'random_state': RAND,
    'verbose': -1
}
pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train,
           idx_valid) in enumerate(cv.split(X_train, y_train, groups=w_train)):
    X_train_, y_train_ = X_train.iloc[idx_train], y_train.iloc[idx_train]
    X_val, y_val = X_train.iloc[idx_valid], y_train.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = lgb.LGBMClassifier(scale_pos_weight=ratio, **params)

    model.fit(X_train_,
              y_train_,
              eval_metric="auc",
              eval_set=eval_set,
              callbacks=[lgb.early_stopping(100)])

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.5f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train, y_train)
y_score_test = model.predict_proba(X_test)
print("Test ROC-AUC SCORE %.5f" % roc_auc_score(y_test, y_score_test[:, 1]))

meta_X['lgb_tuned1000'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['lgb_tuned1000'] = y_score_test[:, 1]

Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[818]	valid_0's auc: 0.851593	valid_0's binary_logloss: 0.445319
Fold: 1 ROC-AUC SCORE 0.85159
---
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[790]	valid_0's auc: 0.847677	valid_0's binary_logloss: 0.481517
Fold: 2 ROC-AUC SCORE 0.84768
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[976]	valid_0's auc: 0.841504	valid_0's binary_logloss: 0.460863
Fold: 3 ROC-AUC SCORE 0.84150
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[960]	valid_0's auc: 0.849501	valid_0's binary_logloss: 0.451582
Fold: 4 ROC-AUC SCORE 0.84950
---
Training until validation scores don't improve for 100 rounds
Early stopping, best iteration is:
[857]	valid_0's auc: 0.852734	valid_0's binary_logloss: 0.447533
Fold: 5 ROC-AUC SCORE 0.8527

## LGBMClassifier simple

In [13]:
params = {
    'random_state': RAND,
    'verbose': -1
}

pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train,
           idx_valid) in enumerate(cv.split(X_train, y_train, groups=w_train)):
    X_train_, y_train_ = X_train.iloc[idx_train], y_train.iloc[idx_train]
    X_val, y_val = X_train.iloc[idx_valid], y_train.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = lgb.LGBMClassifier(scale_pos_weight=ratio, **params)

    model.fit(X_train_,
              y_train_,
              eval_metric="auc",
              eval_set=eval_set,
              callbacks=[lgb.early_stopping(100)])

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.5f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train, y_train)
y_score_test = model.predict_proba(X_test)
print("Test ROC-AUC SCORE %.5f" % roc_auc_score(y_test, y_score_test[:, 1]))

meta_X['lgb'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['lgb'] = y_score_test[:, 1]

Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's auc: 0.843382	valid_0's binary_logloss: 0.47887
Fold: 1 ROC-AUC SCORE 0.84338
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's auc: 0.840359	valid_0's binary_logloss: 0.502522
Fold: 2 ROC-AUC SCORE 0.84036
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's auc: 0.834197	valid_0's binary_logloss: 0.48793
Fold: 3 ROC-AUC SCORE 0.83420
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's auc: 0.840975	valid_0's binary_logloss: 0.480999
Fold: 4 ROC-AUC SCORE 0.84098
---
Training until validation scores don't improve for 100 rounds
Did not meet early stopping. Best iteration is:
[100]	valid_0's auc: 0.842318	valid_0's binary_logloss: 

## RandomForest Simple

In [14]:
pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train, idx_valid) in enumerate(
        cv.split(X_train_bin, y_train_bin, groups=w_train_bin)):
    X_train_ = X_train_bin.iloc[idx_train]
    y_train_ = y_train_bin.iloc[idx_train]
    X_val = X_train_bin.iloc[idx_valid]
    y_val = y_train_bin.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = RandomForestClassifier(random_state=RAND,
                                   class_weight='balanced')
    model.fit(X_train_, y_train_)

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.3f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train_bin, y_train_bin)

meta_X['rf'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['rf'] = model.predict_proba(X_test_bin)[:, 1]

Fold: 1 ROC-AUC SCORE 0.800
---
Fold: 2 ROC-AUC SCORE 0.794
---
Fold: 3 ROC-AUC SCORE 0.791
---
Fold: 4 ROC-AUC SCORE 0.794
---
Fold: 5 ROC-AUC SCORE 0.797
---


## RandomForest  tuned

In [15]:
params = {
    'n_estimators': 274,
    'max_depth': 14,
    'bootstrap': True,
    'min_samples_leaf': 6,
    'min_samples_split': 13,
    'max_samples': 0.7006486151941755,
    'max_features': 'sqrt',
    'class_weight': 'balanced',
    'random_state': RAND,
    'n_jobs': 3,
    'verbose': 0
}

pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train, idx_valid) in enumerate(
        cv.split(X_train_bin, y_train_bin, groups=w_train_bin)):
    X_train_ = X_train_bin.iloc[idx_train]
    y_train_ = y_train_bin.iloc[idx_train]
    X_val = X_train_bin.iloc[idx_valid]
    y_val = y_train_bin.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = RandomForestClassifier(**params)
    model.fit(X_train_, y_train_)

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.3f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train_bin, y_train_bin)

meta_X['rf_tuned'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['rf_tuned'] = model.predict_proba(X_test_bin)[:, 1]

Fold: 1 ROC-AUC SCORE 0.814
---
Fold: 2 ROC-AUC SCORE 0.803
---
Fold: 3 ROC-AUC SCORE 0.804
---
Fold: 4 ROC-AUC SCORE 0.808
---
Fold: 5 ROC-AUC SCORE 0.811
---


## Catboost simple

In [16]:
pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train,
           idx_valid) in enumerate(cv.split(X_train, y_train, groups=w_train)):
    X_train_, y_train_ = X_train.iloc[idx_train], y_train.iloc[idx_train]
    X_val, y_val = X_train.iloc[idx_valid], y_train.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = CatBoostClassifier(scale_pos_weight=ratio,
                               eval_metric="AUC",
                               cat_features=category_features,
                               thread_count = 3,
                               random_state=RAND)
    model.fit(X_train_,
              y_train_,
              eval_set=eval_set,
              early_stopping_rounds=100,
              verbose=False)

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.5f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train, y_train,
          verbose=False)
y_score_test = model.predict_proba(X_test)
print("Test ROC-AUC SCORE %.5f" % roc_auc_score(y_test, y_score_test[:, 1]))

meta_X['catboost'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['catboost'] = y_score_test[:, 1]

Fold: 1 ROC-AUC SCORE 0.85091
---
Fold: 2 ROC-AUC SCORE 0.84922
---
Fold: 3 ROC-AUC SCORE 0.84375
---
Fold: 4 ROC-AUC SCORE 0.84899
---
Fold: 5 ROC-AUC SCORE 0.85241
---
Test ROC-AUC SCORE 0.83597


## Catboost  tuned

In [19]:
catboost_params = {
    'border_count': 254,
    'bootstrap_type': 'MVS',
    'boosting_type': 'Plain',
    'iterations': 1000,
    'l2_leaf_reg': 100,
    'grow_policy': 'SymmetricTree',
    'depth': 9,
    'random_seed': RAND,
    'learning_rate': 0.1
}

pred_val = []
pred_score_val = []

cv = StratifiedGroupKFold(n_splits=N_FOLDS, shuffle=False)

for fold, (idx_train,
           idx_valid) in enumerate(cv.split(X_train, y_train, groups=w_train)):
    X_train_, y_train_ = X_train.iloc[idx_train], y_train.iloc[idx_train]
    X_val, y_val = X_train.iloc[idx_valid], y_train.iloc[idx_valid]

    ratio = float(np.sum(y_train_ == 0)) / np.sum(y_train_ == 1)
    eval_set = [(X_val, y_val)]

    model = CatBoostClassifier(scale_pos_weight=ratio,
                               eval_metric="AUC",
                               cat_features=category_features,
                               thread_count=3,
                               **catboost_params)
    model.fit(X_train_,
              y_train_,
              eval_set=eval_set,
              early_stopping_rounds=100,
              verbose=False)

    y_pred_val = model.predict(X_val)
    y_score_val = model.predict_proba(X_val)

    print("Fold:", fold + 1,
          "ROC-AUC SCORE %.5f" % roc_auc_score(y_val, y_score_val[:, 1]))
    print("---")

    # holdout list
    pred_val.append(y_pred_val)
    pred_score_val.append(y_score_val)

# fit model on all train dataset to perdict on test
model.fit(X_train, y_train, verbose=False)
y_score_test = model.predict_proba(X_test)
print("Test ROC-AUC SCORE %.5f" % roc_auc_score(y_test, y_score_test[:, 1]))

meta_X['catboost_tuned'] = np.concatenate(pred_score_val)[:, 1]
meta_X_test['catboost_tuned'] = y_score_test[:, 1]

Fold: 1 ROC-AUC SCORE 0.85177
---
Fold: 2 ROC-AUC SCORE 0.84975
---
Fold: 3 ROC-AUC SCORE 0.84558
---
Fold: 4 ROC-AUC SCORE 0.85138
---
Fold: 5 ROC-AUC SCORE 0.85439
---
Test ROC-AUC SCORE 0.84503


## Final meta model

- Пересоберем y_train, чтобы он соответствовал meta_X, т.к. при кросвалидации с использованием StratifiedGroupKFold порядок записей меняется

In [22]:
y_train_final = pd.Series()

cv = StratifiedGroupKFold(n_splits=N_FOLDS)
for fold, (idx_train, idx_valid) in enumerate(cv.split(X_train, y_train, w_train)):
    y_train_final = pd.concat([y_train_final, y_train.iloc[idx_valid]])

y_train_final = y_train_final.reset_index(drop=True)
y_train_final.shape

(1216217,)

In [66]:
# meta_X.to_parquet("meta_X.parquet")
# meta_X_test.to_parquet("meta_X_test.parquet")
# pd.DataFrame(y_train_final).to_parquet("y_train_final.parquet")
# pd.DataFrame(y_test).to_parquet("y_test.parquet")

- Посмотрим на метаданные, полученные в результате обучения моделей:

In [69]:
meta_X[:5]

Unnamed: 0,lgb_tuned300,lgb_tuned1000,lgb,rf,rf_tuned,catboost,catboost_tuned
0,0.614121,0.733302,0.676899,0.1,0.607497,0.608814,0.633948
1,0.134799,0.071703,0.244434,0.0,0.280421,0.104657,0.116567
2,0.031323,0.007531,0.016497,0.0,0.148217,0.021875,0.019381
3,0.222561,0.150005,0.283965,0.0,0.290147,0.20148,0.190227
4,0.191715,0.451358,0.146563,0.02,0.22805,0.314943,0.281575


In [80]:
meta_X.iloc[y_train_final[y_train_final == 1].index][:5]

Unnamed: 0,lgb_tuned300,lgb_tuned1000,lgb,rf,rf_tuned,catboost,catboost_tuned
4,0.191715,0.451358,0.146563,0.02,0.22805,0.314943,0.281575
30,0.157807,0.221399,0.292646,0.02,0.268412,0.210013,0.217367
76,0.937879,0.94477,0.869186,0.26,0.769966,0.923189,0.927501
95,0.314741,0.310477,0.324485,0.03,0.378032,0.293103,0.246419
110,0.970737,0.920344,0.927035,0.1,0.641188,0.961258,0.934133


In [81]:
y_test_reseted = y_test.reset_index()
meta_X_test.iloc[y_test_reseted[y_test_reseted["target"] == 1].index][:5]

Unnamed: 0,lgb_tuned300,lgb_tuned1000,lgb,rf,rf_tuned,catboost,catboost_tuned
18,0.633078,0.818446,0.681663,0.06,0.527395,0.859125,0.78007
38,0.508345,0.380742,0.619478,0.02,0.373119,0.485434,0.359112
42,0.674342,0.770196,0.753307,0.07,0.359974,0.768853,0.643274
107,0.611763,0.750395,0.56707,0.06,0.598069,0.633726,0.5897
211,0.913964,0.936043,0.81806,0.24,0.743071,0.92418,0.859422


- Обучим финальную модель

In [82]:
def fit_final_model(meta_X: pd.DataFrame,
                    y_train: pd.Series,
                    meta_X_test: pd.DataFrame,
                    y_test: pd.Series,
                    columns: list[str] = None):
    if columns is None:
        columns = meta_X.columns
        name = "all"
    else:
        name = f"[{'|'.join(columns)}]"
        
    m_X = meta_X[columns]
    m_X_test = meta_X_test[columns]

    final_clf = LogisticRegression(random_state=RAND, class_weight='balanced')
    final_clf.fit(m_X, y_train)

    y_pred_final = final_clf.predict(m_X_test)
    y_proba_final = final_clf.predict_proba(m_X_test)

    y_pred_train = final_clf.predict(m_X)
    y_proba_train = final_clf.predict_proba(m_X)

    check_overfitting(y_train=y_train,
                      y_test=y_test,
                      metric_fun=roc_auc_score,
                      y_pred_train=y_proba_train[:, 1],
                      y_pred_test=y_proba_final[:, 1])

    metrics = get_metrics(y_test,
                          y_pred_final,
                          y_proba_final,
                          w_test,
                          name=f"StackingClassifier_{name}")
    return final_clf, metrics

In [90]:
_, model_metrics = fit_final_model(meta_X=meta_X,
                                   y_train=y_train_final,
                                   meta_X_test=meta_X_test,
                                   y_test=y_test)
metrics = pd.concat([metrics, model_metrics])
metrics.set_index('model').style.highlight_max(axis=0, color='lightblue')

roc_auc_score train: 0.855
roc_auc_score test: 0.851
delta = 0.5 %


Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
CatBoostClassifier_randseach_cv_1000e,0.791088,0.84803,0.11039,0.733194,0.191888,0.681672


- Посмотрим еще на другие сочетания моделей в стекинге 

In [68]:
meta_X.columns

Index(['lgb_tuned300', 'lgb_tuned1000', 'lgb', 'rf', 'rf_tuned', 'catboost',
       'catboost_tuned'],
      dtype='object')

In [91]:
columns = [
    'lgb_tuned300', 'lgb_tuned1000', 'lgb', 'catboost', 'catboost_tuned'
]
_, model_metrics = fit_final_model(meta_X=meta_X,
                                   y_train=y_train_final,
                                   meta_X_test=meta_X_test,
                                   y_test=y_test,
                                   columns=columns)
metrics = pd.concat([metrics, model_metrics])
metrics.set_index('model').style.highlight_max(axis=0, color='lightblue')

roc_auc_score train: 0.855
roc_auc_score test: 0.850
delta = 0.5 %


Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
CatBoostClassifier_randseach_cv_1000e,0.791088,0.84803,0.11039,0.733194,0.191888,0.681672


In [92]:
columns = [
    'lgb_tuned300', 'lgb_tuned1000', 'catboost', 'catboost_tuned'
]
_, model_metrics = fit_final_model(meta_X=meta_X,
                                   y_train=y_train_final,
                                   meta_X_test=meta_X_test,
                                   y_test=y_test,
                                   columns=columns)
metrics = pd.concat([metrics, model_metrics])
metrics.set_index('model').style.highlight_max(axis=0, color='lightblue')

roc_auc_score train: 0.855
roc_auc_score test: 0.850
delta = 0.6 %


Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
CatBoostClassifier_randseach_cv_1000e,0.791088,0.84803,0.11039,0.733194,0.191888,0.681672


In [93]:
columns = [
    'lgb_tuned300', 'lgb_tuned1000', 'lgb'
]
_, model_metrics = fit_final_model(meta_X=meta_X,
                                   y_train=y_train_final,
                                   meta_X_test=meta_X_test,
                                   y_test=y_test,
                                   columns=columns)
metrics = pd.concat([metrics, model_metrics])
metrics.set_index('model').style.highlight_max(axis=0, color='lightblue')

roc_auc_score train: 0.853
roc_auc_score test: 0.848
delta = 0.6 %


Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
CatBoostClassifier_randseach_cv_1000e,0.791088,0.84803,0.11039,0.733194,0.191888,0.681672


In [94]:
columns = [
    'lgb_tuned300', 'lgb_tuned1000', 'lgb', 'catboost'
]
_, model_metrics = fit_final_model(meta_X=meta_X,
                                   y_train=y_train_final,
                                   meta_X_test=meta_X_test,
                                   y_test=y_test,
                                   columns=columns)
metrics = pd.concat([metrics, model_metrics])
metrics.set_index('model').style.highlight_max(axis=0, color='lightblue')

roc_auc_score train: 0.854
roc_auc_score test: 0.849
delta = 0.6 %


Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
CatBoostClassifier_randseach_cv_1000e,0.791088,0.84803,0.11039,0.733194,0.191888,0.681672


# Summary

In [95]:
metrics.sort_values(
    by="gini_stability").set_index('model').style.highlight_max(
        axis=0, color='lightblue')

Unnamed: 0_level_0,Accuracy,ROC_AUC,Precision,Recall,f1,gini_stability
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RandomForestClassifier_baseline_test,0.966177,0.79039,0.75,0.000286,0.000571,0.560007
RandomForestClassifier_optuna_cv,0.861642,0.805039,0.121088,0.493715,0.194479,0.596975
LGBMClassifier_baseline_test,0.72993,0.834365,0.091831,0.785565,0.164439,0.652661
LGBMClassifier_optuna_300e,0.759817,0.839717,0.099464,0.75738,0.175835,0.662517
CatBoostClassifier_baseline_test,0.773091,0.842445,0.103959,0.749095,0.18258,0.669241
LGBMClassifier_optuna_1000e,0.759775,0.843362,0.100686,0.769187,0.178063,0.671506
CatBoostClassifier_randsearch_1000e,0.805107,0.844596,0.114588,0.70777,0.197243,0.674262
LGBMClassifier_optuna_cv_300e,0.76202,0.846057,0.101878,0.772139,0.180006,0.676819
LGBMClassifier_optuna_1000e_cv,0.762532,0.84786,0.102632,0.777376,0.181326,0.681118
StackingClassifier_[lgb_tuned300|lgb_tuned1000|lgb],0.751087,0.848062,0.09944,0.789183,0.176624,0.681543


- Лучший результат по gini_stability показывает стэкинг всех моделей, но это самый долго обучаемый вариант. Другие вариации стекинга чуть ниже по качеству.
- Дальше по качеству идет CatBoostClassifier с подобранными параметрами и кросс-валидацией и стекинг из 3 моделей LGBMClassifier. Но CatBoostClassifier в целом тоже долго обучается и выдает ответ.
- Самый оптимальный вариант по качеству и скорости обучения/предсказания - LGBMClassifier с подобранными параметрами для 1000 estimators c усреднением оценки с помощью cross-validation.

In [96]:
metrics.to_csv(f"final_metrics.csv", index=False)