In [4]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, StratifiedKFold
import optuna

# 加载数据，这里以鸢尾花数据集为例
data = load_iris()
X, y = data.data, data.target

# 定义目标函数（objective function），用于Optuna进行优化
def objective(trial):
    # 定义可能的超参数范围
    n_estimators = trial.suggest_int('n_estimators', 10, 200)
    max_depth = trial.suggest_int('max_depth', 2, 32)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 4)
    max_features = trial.suggest_categorical('max_features', ['log2', 'sqrt'])

    # 创建随机森林模型实例
    clf = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42
    )

    # 使用StratifiedKFold进行交叉验证
    cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(clf, X, y, cv=cv, scoring='accuracy')

    # 返回交叉验证的平均准确率作为评估指标
    return np.mean(scores)

# 创建Optuna研究对象
study = optuna.create_study(direction='maximize')  # 最大化准确率

# 运行超参数优化
study.optimize(objective, n_trials=100)  # 进行100次试验

# 输出最佳超参数
print("Best trial:")
trial_best = study.best_trial
print("  Value: {:.4f}".format(trial_best.value))
print("  Params: ")
for key, value in trial_best.params.items():
    print("    {}: {}".format(key, value))

[I 2024-06-18 02:47:25,494] A new study created in memory with name: no-name-0c27f655-be24-472d-89f9-4ecf30703972
[I 2024-06-18 02:47:26,671] Trial 0 finished with value: 0.9533333333333334 and parameters: {'n_estimators': 155, 'max_depth': 2, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 0 with value: 0.9533333333333334.
[I 2024-06-18 02:47:27,205] Trial 1 finished with value: 0.9600000000000002 and parameters: {'n_estimators': 78, 'max_depth': 4, 'min_samples_split': 2, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9600000000000002.
[I 2024-06-18 02:47:28,188] Trial 2 finished with value: 0.9533333333333334 and parameters: {'n_estimators': 158, 'max_depth': 23, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 1 with value: 0.9600000000000002.
[I 2024-06-18 02:47:28,684] Trial 3 finished with value: 0.9600000000000002 and parameters: {'n_estimators': 77, 'max_depth': 16, 'min_

Best trial:
  Value: 0.9667
  Params: 
    n_estimators: 122
    max_depth: 24
    min_samples_split: 6
    min_samples_leaf: 1
    max_features: log2


In [2]:
!pip install optuna -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting optuna
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/15/da/68883911855d8b4d521f9a370e4e6aab8232b91c1d8d5a8348c4680c6642/optuna-3.6.1-py3-none-any.whl (380 kB)
     ---------------------------------------- 0.0/380.1 kB ? eta -:--:--
     ------------------------------------  378.9/380.1 kB 11.5 MB/s eta 0:00:01
     ------------------------------------  378.9/380.1 kB 11.5 MB/s eta 0:00:01
     -------------------------------------- 380.1/380.1 kB 3.4 MB/s eta 0:00:00
Collecting alembic>=1.5.0 (from optuna)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/7f/50/9fb3a5c80df6eb6516693270621676980acd6d5a9a7efdbfa273f8d616c7/alembic-1.13.1-py3-none-any.whl (233 kB)
     ---------------------------------------- 0.0/233.4 kB ? eta -:--:--
     ----------------------------------- - 225.3/233.4 kB 13.4 MB/s eta 0:00:01
     ----------------------------------- - 225.3/233.4 kB 13.4 MB/s eta 0:00:01
     --