In [1]:
# Тюнинг
import optuna as opt
from catboost import CatBoostClassifier
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

# Данные
import os
import pandas as pd

In [2]:
# Пути
ROOT = os.getcwd()
TRAIN_DATASET = os.path.join(ROOT, '../data/train_AIC.csv')
TEST_DATASET = os.path.join(ROOT, '../data/test_AIC.csv')

# Загрузка
train_df = pd.read_csv(TRAIN_DATASET)
test_df = pd.read_csv(TEST_DATASET)

X, y = train_df.iloc[:, :-1], train_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [21]:
# Функция оптимизации
def objective(trial: opt.Trial):
    # Параметры
    learning_rate = trial.suggest_float('learning_rate', 0.1, 0.7, log=True)
    n_estimators = trial.suggest_int('n_estimators', 100, 1000, 50)
    max_depth = trial.suggest_int('max_depth', 4, 12)
    max_bin = trial.suggest_int('max_bin', 32, 128)
    l2_leaf_reg = trial.suggest_float('l2_reg', 0.1, 1, log=True)

    # Модель
    model = CatBoostClassifier(
        learning_rate=learning_rate,
        n_estimators=n_estimators,
        max_depth=max_depth,
        l2_leaf_reg=l2_leaf_reg,
        cat_features=['Поставщик']
        )

    fit_params = {
        'plot': True
    }

    cv_score = cross_val_score(model, X_train, y_train, cv=StratifiedKFold(), scoring='f1_macro', n_jobs=-1, fit_params=fit_params)
    accuracy = cv_score.mean()
    return accuracy

In [None]:
study = opt.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

In [44]:
model = CatBoostClassifier(
        learning_rate=0.3,
        n_estimators=600,
        max_depth=8,
        l2_leaf_reg=0.2,
        cat_features=[
            'Поставщик', 'Материал', 'Категорийный менеджер', 'Операционный менеджер',
            'Завод', 'Закупочная организация', 'Группа закупок', 'Балансовая единица',
            'ЕИ', 'Вариант поставки'
            ],
        custom_metric=['Recall', 'F1', 'AUC']
        )

model.fit(X_train, y_train, plot=True)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 0.5127810	total: 854ms	remaining: 8m 31s
1:	learn: 0.4128842	total: 1.59s	remaining: 7m 55s
2:	learn: 0.3588441	total: 2.75s	remaining: 9m 7s
3:	learn: 0.3325485	total: 3.61s	remaining: 8m 57s
4:	learn: 0.3198872	total: 4.53s	remaining: 8m 59s
5:	learn: 0.3120220	total: 5.17s	remaining: 8m 32s
6:	learn: 0.3044522	total: 6.08s	remaining: 8m 34s
7:	learn: 0.2973740	total: 6.82s	remaining: 8m 24s
8:	learn: 0.2930820	total: 7.63s	remaining: 8m 21s
9:	learn: 0.2871655	total: 8.5s	remaining: 8m 21s
10:	learn: 0.2834852	total: 9.47s	remaining: 8m 27s
11:	learn: 0.2781857	total: 10.3s	remaining: 8m 24s
12:	learn: 0.2741076	total: 11.1s	remaining: 8m 20s
13:	learn: 0.2722376	total: 11.9s	remaining: 8m 17s
14:	learn: 0.2705211	total: 12.7s	remaining: 8m 16s
15:	learn: 0.2674921	total: 13.6s	remaining: 8m 16s
16:	learn: 0.2628445	total: 14.3s	remaining: 8m 11s
17:	learn: 0.2608811	total: 15.1s	remaining: 8m 8s
18:	learn: 0.2581382	total: 16.1s	remaining: 8m 13s
19:	learn: 0.2559319	tota

<catboost.core.CatBoostClassifier at 0x22668e34650>

In [47]:
f1_score(y_test, model.predict(X_test))

0.7853279242731576

In [46]:
cv_score = cross_val_score(model, X_test, y_test, cv=StratifiedKFold(), n_jobs=-1)

RuntimeError: Cannot clone object <catboost.core.CatBoostClassifier object at 0x0000022668E34650>, as the constructor either does not set or modifies parameter custom_metric