In [8]:
# Тюнинг
import optuna as opt
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.metrics import f1_score

# Данные
import os
import pandas as pd

In [2]:
# Пути
ROOT = os.getcwd()
TRAIN_DATASET = os.path.join(ROOT, '../data/train_AIC.csv')
TEST_DATASET = os.path.join(ROOT, '../data/test_AIC.csv')

# Загрузка
train_df = pd.read_csv(TRAIN_DATASET)
test_df = pd.read_csv(TEST_DATASET)

X, y = train_df.iloc[:, :-1], train_df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [6]:
# Функция оптимизации
def objective(trial: opt.Trial):
    # Параметры
    learning_rate = trial.suggest_float('learning_rate', 0.1, 0.7, log=True)
    n_estimators = trial.suggest_int('n_estimators', 100, 700, 50)
    max_depth = trial.suggest_int('max_depth', 4, 10)
    max_bin = trial.suggest_int('max_bin', 32, 80)
    l2_leaf_reg = trial.suggest_float('l2_reg', 0.1, 0.7, log=True)

    # Модель
    model = CatBoostClassifier(
        learning_rate=learning_rate,
        n_estimators=n_estimators,
        max_depth=max_depth,
        l2_leaf_reg=l2_leaf_reg,
        cat_features=[
            'Поставщик', 'Материал', 'Категорийный менеджер', 'Операционный менеджер',
            'Завод', 'Закупочная организация', 'Группа закупок', 'Балансовая единица',
            'ЕИ', 'Вариант поставки'
            ]
        )

    cv_score = cross_val_score(model, X_train, y_train, cv=StratifiedKFold(), scoring='f1_macro', n_jobs=-1)
    accuracy = cv_score.mean()
    return accuracy

In [33]:
X_train_lgbm, X_test_lgbm = X_train.copy(), X_test.copy()
new_features = [num for num in range(0, 43)]
X_train_lgbm.columns, X_test_lgbm.columns = new_features, new_features

# Функция оптимизации
def objective_lgbm(trial: opt.Trial):
    # Параметры
    learning_rate = trial.suggest_float('learning_rate', 0.1, 0.7, log=True)
    n_estimators = trial.suggest_int('n_estimators', 300, 1200, 50)
    max_depth = trial.suggest_int('max_depth', 6, 16)
    max_bin = trial.suggest_int('max_bin', 64, 256),
    num_leaves = trial.suggest_int('num_leaves', 64, 512)
    reg_lambda = trial.suggest_float('l2_reg', 0.1, 0.7, log=True)

    # Модель
    model = LGBMClassifier(
        learning_rate=learning_rate,
        n_estimators=n_estimators,
        max_depth=max_depth,
        reg_lambda=reg_lambda,
        max_bin=max_bin,
        n_jobs=-1,
        force_col_wise=True,
        verbose=-1
        )

    model.fit(X_train_lgbm, y_train)
    # cv_score = cross_val_score(model, X_train_lgbm, y_train, cv=StratifiedKFold(), scoring='f1_macro', n_jobs=-1)
    # accuracy = cv_score.mean()

    accuracy = f1_score(y_test, model.predict(X_test_lgbm), average='macro')
    return accuracy

In [34]:
study = opt.create_study(direction='maximize')
study.optimize(objective_lgbm, n_trials=50)

[I 2023-08-07 00:34:05,788] A new study created in memory with name: no-name-fce1b302-65bc-48c0-a18f-ffd3dd5ccc86
[I 2023-08-07 00:34:12,772] Trial 0 finished with value: 0.8624893254525894 and parameters: {'learning_rate': 0.43972569737536354, 'n_estimators': 450, 'max_depth': 8, 'max_bin': 93, 'num_leaves': 319, 'l2_reg': 0.5010953630019842}. Best is trial 0 with value: 0.8624893254525894.
[I 2023-08-07 00:34:28,936] Trial 1 finished with value: 0.8822087233648749 and parameters: {'learning_rate': 0.4115480364583243, 'n_estimators': 1100, 'max_depth': 9, 'max_bin': 203, 'num_leaves': 351, 'l2_reg': 0.6407752586348276}. Best is trial 1 with value: 0.8822087233648749.
[I 2023-08-07 00:34:43,190] Trial 2 finished with value: 0.8804783415117436 and parameters: {'learning_rate': 0.45974561829606764, 'n_estimators': 850, 'max_depth': 11, 'max_bin': 185, 'num_leaves': 264, 'l2_reg': 0.22464374632665596}. Best is trial 1 with value: 0.8822087233648749.
[I 2023-08-07 00:35:02,261] Trial 3 fin

In [None]:
# model = CatBoostClassifier(
#         learning_rate=0.3,
#         n_estimators=600,
#         max_depth=8,
#         l2_leaf_reg=0.2,
#         cat_features=[
#             'Поставщик', 'Материал', 'Категорийный менеджер', 'Операционный менеджер',
#             'Завод', 'Закупочная организация', 'Группа закупок', 'Балансовая единица',
#             'ЕИ', 'Вариант поставки'
#             ],
#         custom_metric=['Recall', 'F1']
#         )

# model.fit(X_train, y_train, plot=True)

In [47]:
f1_score(y_test, model.predict(X_test))

0.7853279242731576

In [46]:
cv_score = cross_val_score(model, X_test, y_test, cv=StratifiedKFold(), n_jobs=-1)

RuntimeError: Cannot clone object <catboost.core.CatBoostClassifier object at 0x0000022668E34650>, as the constructor either does not set or modifies parameter custom_metric