# Лабораторная работа 5.3

Подбор гиперпараметров XGBoost с использованием RandomizedSearchCV и Hyperopt (TPE)

In [2]:
import pandas as pd
import numpy as np
import time
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import f1_score
from scipy.stats import uniform, randint
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK

In [3]:
df = pd.read_csv("diabetes.csv")
X = df.drop("Outcome", axis=1)
y = df["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

## 1. Подбор с использованием RandomizedSearchCV

In [4]:
param_dist = {
    'n_estimators': randint(50, 200),
    'max_depth': randint(3, 10),
    'learning_rate': uniform(0.01, 0.3),
    'subsample': uniform(0.5, 0.5),
    'colsample_bytree': uniform(0.5, 0.5),
    'reg_alpha': uniform(0, 1),
    'reg_lambda': uniform(0, 1)
}

xgb = XGBClassifier(eval_metric='logloss')
start = time.time()
random_search = RandomizedSearchCV(xgb, param_distributions=param_dist, n_iter=20, scoring='f1', cv=3, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
random_time = time.time() - start

best_model_random = random_search.best_estimator_
y_pred_random = best_model_random.predict(X_test)
f1_random = f1_score(y_test, y_pred_random)
print(f"RandomizedSearchCV F1-score: {f1_random:.4f}, Time: {random_time:.2f}s")
print("Best Params:", random_search.best_params_)

RandomizedSearchCV F1-score: 0.5874, Time: 3.66s
Best Params: {'colsample_bytree': np.float64(0.6448757264568841), 'learning_rate': np.float64(0.058366386176201324), 'max_depth': 4, 'n_estimators': 94, 'reg_alpha': np.float64(0.295633685837714), 'reg_lambda': np.float64(0.10549425983027061), 'subsample': np.float64(0.7282672852414551)}


## 2. Подбор с использованием Hyperopt (TPE)

In [5]:

# Поиск по гиперпараметрам
space = {
    'n_estimators': hp.quniform('n_estimators', 50, 200, 1),
    'max_depth': hp.quniform('max_depth', 3, 10, 1),
    'learning_rate': hp.uniform('learning_rate', 0.01, 0.3),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
    'reg_alpha': hp.uniform('reg_alpha', 0, 1.0),
    'reg_lambda': hp.uniform('reg_lambda', 0, 1.0)
}

# Целевая функция для оптимизации
def objective(params):
    model = XGBClassifier(
        eval_metric='logloss',
        n_estimators=int(params['n_estimators']),
        max_depth=int(params['max_depth']),
        learning_rate=params['learning_rate'],
        subsample=params['subsample'],
        colsample_bytree=params['colsample_bytree'],
        reg_alpha=params['reg_alpha'],
        reg_lambda=params['reg_lambda']
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    score = f1_score(y_test, y_pred)
    return {'loss': -score, 'status': STATUS_OK}

# Запуск TPE оптимизации
start = time.time()
trials = Trials()

# Используем default_rng — работает корректно с Hyperopt
from numpy.random import default_rng
rng = default_rng(42)

best = fmin(fn=objective, space=space, algo=tpe.suggest,
            max_evals=20, trials=trials, rstate=rng)

tpe_time = time.time() - start

print("Best Hyperopt Params:", best)
print(f"TPE Optimization Time: {tpe_time:.2f}s")


100%|██████████████████████████████████████████████████████████████████████| 20/20 [00:01<00:00, 16.12trial/s, best loss: -0.6225165562913907]
Best Hyperopt Params: {'colsample_bytree': np.float64(0.7862468704907701), 'learning_rate': np.float64(0.05995859136138078), 'max_depth': np.float64(8.0), 'n_estimators': np.float64(182.0), 'reg_alpha': np.float64(0.08885184806583746), 'reg_lambda': np.float64(0.4671485658354062), 'subsample': np.float64(0.5337114618970547)}
TPE Optimization Time: 1.25s
