In [2]:
import pandas as pd 
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df = pd.read_excel('testing.xlsx')

In [6]:
X = df[['department_id', 'name', 'salary']]

In [7]:
y = df['chief_id']

In [12]:
import optuna
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

# Генерация или загрузка данных
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()
X = data.data
y = data.target

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Определение функции для оптимизации
def objective(trial):
    params = {
        'iterations': trial.suggest_int('iterations', 100, 1000),
        'depth': trial.suggest_int('depth', 4, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.3),
        'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10),
        'random_strength': trial.suggest_uniform('random_strength', 1e-3, 10),
        'border_count': trial.suggest_int('border_count', 32, 255),
        'boosting_type': trial.suggest_categorical('boosting_type', ['Plain', 'Ordered']),
        'bootstrap_type': trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli', 'MVS', 'No']),
        'task_type': 'CPU',  # Укажите 'GPU', если у вас есть поддержка GPU
        'verbose': 0
    }

    # Условное добавление параметров, связанных с бутстрэпингом
    if params['bootstrap_type'] == 'Bayesian':
        params['bagging_temperature'] = trial.suggest_uniform('bagging_temperature_bayesian', 0.0, 1.0)
    elif params['bootstrap_type'] == 'Bernoulli':
        params['subsample'] = trial.suggest_uniform('subsample', 0.5, 1.0)
    elif params['bootstrap_type'] == 'MVS':
        pass  # Нет дополнительных параметров для MVS
    elif params['bootstrap_type'] == 'No':
        # Убедимся, что никакие бутстрэп-параметры не добавлены
        params = {key: value for key, value in params.items() if key not in ['bagging_temperature', 'subsample']}
    
    model = CatBoostClassifier(**params)
    model.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=50, verbose=0)
    
    preds = model.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    return accuracy

# Создание и запуск исследования
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

# Вывод лучших параметров и метрики
print("Best parameters:", study.best_params)
print("Best accuracy:", study.best_value)

# Обучение лучшей модели
best_params = study.best_params
best_model = CatBoostClassifier(**best_params)
best_model.fit(X_train, y_train)

# Оценка на тестовой выборке
final_preds = best_model.predict(X_test)
final_accuracy = accuracy_score(y_test, final_preds)
print("Final accuracy on test data:", final_accuracy)


[I 2024-12-09 14:41:16,569] A new study created in memory with name: no-name-91dfe53c-ec43-4789-9b95-e5c179268568
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.3),
  'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10),
  'random_strength': trial.suggest_uniform('random_strength', 1e-3, 10),
  params['subsample'] = trial.suggest_uniform('subsample', 0.5, 1.0)
[I 2024-12-09 14:41:56,967] Trial 0 finished with value: 0.9736842105263158 and parameters: {'iterations': 427, 'depth': 9, 'learning_rate': 0.010019957981435776, 'l2_leaf_reg': 0.1381774697935787, 'random_strength': 1.3994891707627002, 'border_count': 80, 'boosting_type': 'Ordered', 'bootstrap_type': 'Bernoulli', 'subsample': 0.8844249640706614}. Best is trial 0 with value: 0.9736842105263158.
  'learning_rate': trial.suggest_loguniform('learning_rate', 1e-3, 0.3),
  'l2_leaf_reg': trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10),
  'random_strength': trial.suggest_uniform('random_strength', 1e

Best parameters: {'iterations': 484, 'depth': 8, 'learning_rate': 0.08858296680960444, 'l2_leaf_reg': 0.0012955333057395107, 'random_strength': 8.812524987976827, 'border_count': 63, 'boosting_type': 'Ordered', 'bootstrap_type': 'No'}
Best accuracy: 0.9912280701754386
0:	learn: 0.4970805	total: 38.3ms	remaining: 18.5s
1:	learn: 0.3704834	total: 78.1ms	remaining: 18.8s
2:	learn: 0.2930217	total: 121ms	remaining: 19.4s
3:	learn: 0.2206379	total: 161ms	remaining: 19.3s
4:	learn: 0.1654539	total: 203ms	remaining: 19.4s
5:	learn: 0.1402188	total: 247ms	remaining: 19.7s
6:	learn: 0.1113341	total: 299ms	remaining: 20.4s
7:	learn: 0.0806344	total: 341ms	remaining: 20.3s
8:	learn: 0.0723221	total: 385ms	remaining: 20.3s
9:	learn: 0.0636220	total: 431ms	remaining: 20.4s
10:	learn: 0.0513643	total: 479ms	remaining: 20.6s
11:	learn: 0.0461922	total: 520ms	remaining: 20.5s
12:	learn: 0.0408404	total: 563ms	remaining: 20.4s
13:	learn: 0.0373611	total: 609ms	remaining: 20.5s
14:	learn: 0.0346923	tota