In [1]:
# 以鸢尾花数据集为例, 演示如何使用optu
import optuna
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score

In [2]:
SEED = 42

In [3]:
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

In [4]:
# 定义优化目标函数
def objective(trial):
    # 定义超参数搜索空间
    params = {
        'random_state': SEED,
        'n_estimators': trial.suggest_int('n_estimators', 10, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 10)
    }
    
    # 构建并训练模型
    model = RandomForestClassifier(**params)
    model.fit(X_train, y_train)
    
    # 预测并返回准确率
    y_pred = model.predict(X_test)
    return f1_score(y_test, y_pred,average='weighted')

# 创建并优化 study
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

# 输出最优超参数
print(f"Best hyperparameters: {study.best_params}")
print(f"Best f1: {study.best_value:.4f}")


[I 2025-01-16 17:12:57,113] A new study created in memory with name: no-name-9a02246a-403b-48f0-9c6f-454e75c3fa7d
[I 2025-01-16 17:12:57,152] Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 37, 'max_depth': 4}. Best is trial 0 with value: 1.0.
[I 2025-01-16 17:12:57,178] Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 35, 'max_depth': 5}. Best is trial 0 with value: 1.0.
[I 2025-01-16 17:12:57,204] Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 34, 'max_depth': 6}. Best is trial 0 with value: 1.0.
[I 2025-01-16 17:12:57,263] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 96, 'max_depth': 8}. Best is trial 0 with value: 1.0.
[I 2025-01-16 17:12:57,324] Trial 4 finished with value: 1.0 and parameters: {'n_estimators': 82, 'max_depth': 3}. Best is trial 0 with value: 1.0.
[I 2025-01-16 17:12:57,356] Trial 5 finished with value: 1.0 and parameters: {'n_estimators': 42, 'max_depth': 3}. Best is trial 0 with value: 1.0

Best hyperparameters: {'n_estimators': 37, 'max_depth': 4}
Best f1: 1.0000


大部分时，我们会使用交叉验证来评估模型的性能并以此选出最佳的超参数。下面是使用交叉验证优化并更加全面的完整代码

In [5]:
from sklearn.model_selection import cross_val_score, StratifiedKFold

import numpy as np

In [6]:
def n_cross_validata(model,data):
    
    X, y = data.data, data.target
    
    # 使用 StratifiedKFold 进行交叉验证，以保证每个折叠中的类别比例一致
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
    
    # 计算交叉验证的 F1 分数
    f1_scores = cross_val_score(model, X, y, cv=cv, scoring='f1_macro')
    
    # 返回平均 F1 分数
    return np.mean(f1_scores)


def CV_objective(trial,data):
    # 初始化参数网络
    params = {
        'random_state': SEED,
        'n_estimators': trial.suggest_int('n_estimators', 10, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 10)
    }
    
    # 定义模型
    model = RandomForestClassifier(**params)
    
    # 取得交叉验证分数
    score = n_cross_validata(model,data)
    
    return score


def run_optimization(data):
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: CV_objective(trial, data), n_trials=100)
    
    print(f"Best params: {study.best_params}")
    print(f"Best score: {study.best_value}")
    return study.best_params

In [7]:
params = run_optimization(data)

[I 2025-01-16 17:13:01,736] A new study created in memory with name: no-name-803f9831-9446-4d13-8ba0-022b56a55106
[I 2025-01-16 17:13:02,006] Trial 0 finished with value: 0.9597984861142755 and parameters: {'n_estimators': 75, 'max_depth': 3}. Best is trial 0 with value: 0.9597984861142755.
[I 2025-01-16 17:13:02,210] Trial 1 finished with value: 0.9531151110098477 and parameters: {'n_estimators': 53, 'max_depth': 6}. Best is trial 0 with value: 0.9597984861142755.
[I 2025-01-16 17:13:02,560] Trial 2 finished with value: 0.9464317359054201 and parameters: {'n_estimators': 96, 'max_depth': 7}. Best is trial 0 with value: 0.9597984861142755.
[I 2025-01-16 17:13:02,718] Trial 3 finished with value: 0.9531151110098477 and parameters: {'n_estimators': 45, 'max_depth': 9}. Best is trial 0 with value: 0.9597984861142755.
[I 2025-01-16 17:13:03,004] Trial 4 finished with value: 0.9464317359054201 and parameters: {'n_estimators': 77, 'max_depth': 6}. Best is trial 0 with value: 0.95979848611427

Best params: {'n_estimators': 80, 'max_depth': 3}
Best score: 0.9665152780942254
