# 1. 导入模块和库

In [1]:
import time
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.signal import savgol_filter
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, cohen_kappa_score, confusion_matrix, classification_report

  from .autonotebook import tqdm as notebook_tqdm


# 2. 导入数据+PCA

In [2]:
data = pd.read_csv(r"F:\Code_Data\2_Rocks_Spectrum_Reflectance_essay_30_origin_modify_2.csv",encoding='utf-8') #encoding='GBK',防止中文乱码
X = data.iloc[:,1:-1]
y = data.iloc[:,-1]
X_SG = savgol_filter(X, 5, 2)
X_SG_mms = MinMaxScaler().fit_transform(X_SG)
Label = LabelEncoder().fit_transform(y)
data.iloc[:,1:-1] = X_SG_mms
data.iloc[:, -1] = Label
X = data.iloc[:,1:-1]
y = data.iloc[:,-1]
X.shape, y.shape

X_dr = PCA(29).fit_transform(X)

# 3. 遗传算法

In [3]:
def optuna_objective(trial):

    #定义参数空间
    n_estimators = trial.suggest_int("n_estimators",50,150,1) #整数型，(参数名称，下界，上界，步长)
    max_depth = trial.suggest_int("max_depth",30,50,1)
    max_features = trial.suggest_int("max_features",5,30,1)
    min_samples_leaf = trial.suggest_int("min_samples_leaf",1,51,1)
    min_samples_split = trial.suggest_int("min_samples_split",2,22,1)

    #定义评估器
    #需要优化的参数由上述参数空间决定
    #不需要优化的参数则直接填写具体值
    rfc = RFC(n_estimators = n_estimators
              ,max_depth = max_depth
              ,max_features = max_features
              ,min_samples_leaf = min_samples_leaf
              ,min_samples_split = min_samples_split
              ,random_state=0
              ,verbose=False
              ,n_jobs=-1
             )

    #交叉验证过程，输出准确率(accuracy)
    cv = KFold(n_splits=5,shuffle=True,random_state=0)
    score = cross_val_score(rfc,X_dr,y
                            ,scoring='accuracy'
                            ,cv=cv #交叉验证模式
                            ,verbose=False #是否打印进程
                            ,n_jobs=-1 #线程数
                            ,error_score='raise' #如果在某个折叠中出现错误，会引发一个异常
                           )
    #最终输出平均准确率
    return np.mean(score)

In [4]:
def optimizer_optuna(n_trials, algo):

    #定义使用随机网格搜索
    sampler = optuna.samplers.NSGAIISampler(population_size=50  # int，种群大小，即每代中包含的个体数量。默认 50
                                           ,mutation_prob=1.0 # float，变异概率，指定了在交叉之后每个个体发生变异的概率。默认 1.0。
                                           ,crossover_prob=1.0  # float，交叉概率，指定了在每次交叉操作中，一个个体与另一个个体产生子代的概率。默认 1.0
                                           #,crossover_operator  # str，交叉操作符的类型。可选 "sbx"（模拟二进制交叉）和 "ux"（单点均匀交叉）。默认"sbx"
                                           #,mutation_operator  # str，变异操作符的类型。可选 "polynomial"（多项式变异）和 "random"（随机变异）。默认"polynomial"
                                           #,eta= # float，多项式变异操作的参数，用于控制变异的强度。较高的值表示更强的变异。默认20.0。
                                           #,crossover_eta= # float，模拟二进制交叉操作的参数，用于控制交叉的强度。较低的值表示更强的交叉。默认20.0。
                                           #,max_generations=  # int，最大的迭代代数。默认值为 None，表示没有最大代数限制。
                                           )

    #实际优化过程，首先实例化优化器
    study = optuna.create_study(sampler = sampler #要使用的具体算法
                                ,direction="maximize" #优化的方向，可以填写minimize或maximize
                                #,storage="sqlite:///db.sqlite3" # Specify the storage URL here.
                                #,study_name="quadratic-simple"
                               )
    #开始优化，n_trials为允许的最大迭代次数
    #由于参数空间已经在目标函数中定义好，因此不需要输入参数空间
    study.optimize(optuna_objective #目标函数
                   ,n_trials=n_trials #最大迭代次数（包括最初的观测值的）
                   ,show_progress_bar=True #要不要展示进度条呀？
                  )

    #可直接从优化好的对象study中调用优化的结果
    #打印最佳参数与最佳损失值
    print("\n","\n","best params: ", study.best_trial.params,
          "\n","\n","best score: ", study.best_trial.values,
          "\n")

    return study.best_trial.params, study.best_trial.values

In [5]:
def optimized_optuna_search_and_report(n_trials, algo):
    start_time = time.time()

    # 进行搜索优化
    best_params, best_score = optimizer_optuna(n_trials, algo)

    # 打印最佳参数和分数
    print("\n","\n","best params: ", best_params,
          "\n","\n","best score: ", best_score,
          "\n")

    end_time = time.time()
    time_random = (end_time - start_time) / 60  # 转换为分钟
    print(f"Optimization completed in {time_random} minutes.")

    return best_params, best_score, time_random

# 执行优化
best_params, best_score, time_random = optimized_optuna_search_and_report(30,optuna.samplers.NSGAIISampler())

[I 2023-09-20 09:15:19,752] A new study created in memory with name: no-name-ea8a0c3c-11d6-4f6f-bc27-e989a19898cc
Best trial: 0. Best value: 0.866335:   3%|█▌                                            | 1/30 [00:58<28:25, 58.79s/it]

[I 2023-09-20 09:16:18,567] Trial 0 finished with value: 0.8663347743490857 and parameters: {'n_estimators': 113, 'max_depth': 43, 'max_features': 12, 'min_samples_leaf': 21, 'min_samples_split': 7}. Best is trial 0 with value: 0.8663347743490857.


Best trial: 0. Best value: 0.866335:   7%|███                                           | 2/30 [02:17<32:53, 70.48s/it]

[I 2023-09-20 09:17:37,229] Trial 1 finished with value: 0.8161150120005217 and parameters: {'n_estimators': 104, 'max_depth': 30, 'max_features': 24, 'min_samples_leaf': 45, 'min_samples_split': 21}. Best is trial 0 with value: 0.8663347743490857.


Best trial: 2. Best value: 0.912327:  10%|████▌                                         | 3/30 [03:59<38:08, 84.74s/it]

[I 2023-09-20 09:19:18,947] Trial 2 finished with value: 0.9123267430244175 and parameters: {'n_estimators': 111, 'max_depth': 33, 'max_features': 20, 'min_samples_leaf': 2, 'min_samples_split': 22}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  13%|██████▏                                       | 4/30 [05:13<34:56, 80.63s/it]

[I 2023-09-20 09:20:33,262] Trial 3 finished with value: 0.908268511452769 and parameters: {'n_estimators': 107, 'max_depth': 32, 'max_features': 15, 'min_samples_leaf': 4, 'min_samples_split': 21}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  17%|███████▋                                      | 5/30 [06:45<35:21, 84.85s/it]

[I 2023-09-20 09:22:05,600] Trial 4 finished with value: 0.8720837659656981 and parameters: {'n_estimators': 103, 'max_depth': 49, 'max_features': 23, 'min_samples_leaf': 19, 'min_samples_split': 15}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  20%|█████████▏                                    | 6/30 [07:42<30:07, 75.31s/it]

[I 2023-09-20 09:23:02,401] Trial 5 finished with value: 0.9108894683134396 and parameters: {'n_estimators': 95, 'max_depth': 39, 'max_features': 13, 'min_samples_leaf': 6, 'min_samples_split': 2}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  23%|██████████▋                                   | 7/30 [09:36<33:40, 87.84s/it]

[I 2023-09-20 09:24:56,018] Trial 6 finished with value: 0.9062396279927587 and parameters: {'n_estimators': 144, 'max_depth': 41, 'max_features': 19, 'min_samples_leaf': 8, 'min_samples_split': 15}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  27%|████████████▎                                 | 8/30 [10:14<26:23, 71.99s/it]

[I 2023-09-20 09:25:34,075] Trial 7 finished with value: 0.8821448319122739 and parameters: {'n_estimators': 150, 'max_depth': 36, 'max_features': 6, 'min_samples_leaf': 14, 'min_samples_split': 11}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  30%|█████████████▊                                | 9/30 [11:15<24:03, 68.75s/it]

[I 2023-09-20 09:26:35,716] Trial 8 finished with value: 0.8842581032563144 and parameters: {'n_estimators': 67, 'max_depth': 36, 'max_features': 21, 'min_samples_leaf': 15, 'min_samples_split': 21}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  33%|███████████████                              | 10/30 [11:34<17:44, 53.21s/it]

[I 2023-09-20 09:26:54,135] Trial 9 finished with value: 0.8273595813846262 and parameters: {'n_estimators': 53, 'max_depth': 32, 'max_features': 9, 'min_samples_leaf': 38, 'min_samples_split': 11}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  37%|████████████████▌                            | 11/30 [13:50<24:56, 78.75s/it]

[I 2023-09-20 09:29:10,774] Trial 10 finished with value: 0.8455371104744988 and parameters: {'n_estimators': 150, 'max_depth': 40, 'max_features': 24, 'min_samples_leaf': 30, 'min_samples_split': 14}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  40%|██████████████████                           | 12/30 [15:53<27:36, 92.00s/it]

[I 2023-09-20 09:31:13,089] Trial 11 finished with value: 0.8240627351181912 and parameters: {'n_estimators': 150, 'max_depth': 39, 'max_features': 22, 'min_samples_leaf': 41, 'min_samples_split': 22}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  43%|███████████████████▌                         | 13/30 [16:58<23:47, 84.00s/it]

[I 2023-09-20 09:32:18,670] Trial 12 finished with value: 0.9095371176229852 and parameters: {'n_estimators': 108, 'max_depth': 33, 'max_features': 10, 'min_samples_leaf': 6, 'min_samples_split': 11}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  47%|█████████████████████                        | 14/30 [18:02<20:47, 77.97s/it]

[I 2023-09-20 09:33:22,716] Trial 13 finished with value: 0.8119726427418733 and parameters: {'n_estimators': 86, 'max_depth': 38, 'max_features': 20, 'min_samples_leaf': 47, 'min_samples_split': 21}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  50%|██████████████████████▌                      | 15/30 [19:19<19:21, 77.45s/it]

[I 2023-09-20 09:34:38,974] Trial 14 finished with value: 0.8785933922964334 and parameters: {'n_estimators': 55, 'max_depth': 39, 'max_features': 30, 'min_samples_leaf': 17, 'min_samples_split': 18}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  53%|████████████████████████                     | 16/30 [20:15<16:35, 71.14s/it]

[I 2023-09-20 09:35:35,458] Trial 15 finished with value: 0.8775795224453542 and parameters: {'n_estimators': 112, 'max_depth': 34, 'max_features': 11, 'min_samples_leaf': 17, 'min_samples_split': 3}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  57%|█████████████████████████▌                   | 17/30 [23:00<21:30, 99.29s/it]

[I 2023-09-20 09:38:20,215] Trial 16 finished with value: 0.8193283282013157 and parameters: {'n_estimators': 145, 'max_depth': 37, 'max_features': 30, 'min_samples_leaf': 44, 'min_samples_split': 15}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  60%|███████████████████████████                  | 18/30 [24:07<17:56, 89.68s/it]

[I 2023-09-20 09:39:27,510] Trial 17 finished with value: 0.8682794843796632 and parameters: {'n_estimators': 97, 'max_depth': 30, 'max_features': 16, 'min_samples_leaf': 20, 'min_samples_split': 20}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  63%|████████████████████████████▌                | 19/30 [25:18<15:24, 84.03s/it]

[I 2023-09-20 09:40:38,370] Trial 18 finished with value: 0.8158615624089685 and parameters: {'n_estimators': 118, 'max_depth': 49, 'max_features': 15, 'min_samples_leaf': 47, 'min_samples_split': 21}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  67%|██████████████████████████████               | 20/30 [25:44<11:05, 66.54s/it]

[I 2023-09-20 09:41:04,170] Trial 19 finished with value: 0.839534311842004 and parameters: {'n_estimators': 89, 'max_depth': 44, 'max_features': 7, 'min_samples_leaf': 33, 'min_samples_split': 9}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  70%|███████████████████████████████▍             | 21/30 [28:13<13:41, 91.24s/it]

[I 2023-09-20 09:43:33,001] Trial 20 finished with value: 0.884511481363002 and parameters: {'n_estimators': 135, 'max_depth': 42, 'max_features': 30, 'min_samples_leaf': 15, 'min_samples_split': 20}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  73%|█████████████████████████████████            | 22/30 [29:09<10:46, 80.80s/it]

[I 2023-09-20 09:44:29,436] Trial 21 finished with value: 0.8158615266665356 and parameters: {'n_estimators': 91, 'max_depth': 34, 'max_features': 20, 'min_samples_leaf': 45, 'min_samples_split': 19}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  77%|██████████████████████████████████▌          | 23/30 [29:48<07:56, 68.08s/it]

[I 2023-09-20 09:45:07,859] Trial 22 finished with value: 0.808421846489825 and parameters: {'n_estimators': 86, 'max_depth': 45, 'max_features': 15, 'min_samples_leaf': 50, 'min_samples_split': 18}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  80%|████████████████████████████████████         | 24/30 [30:23<05:50, 58.41s/it]

[I 2023-09-20 09:45:43,720] Trial 23 finished with value: 0.8140012760048538 and parameters: {'n_estimators': 129, 'max_depth': 33, 'max_features': 9, 'min_samples_leaf': 46, 'min_samples_split': 3}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  83%|█████████████████████████████████████▌       | 25/30 [31:04<04:25, 53.04s/it]

[I 2023-09-20 09:46:24,215] Trial 24 finished with value: 0.8640519051610285 and parameters: {'n_estimators': 54, 'max_depth': 41, 'max_features': 22, 'min_samples_leaf': 21, 'min_samples_split': 4}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  87%|███████████████████████████████████████      | 26/30 [31:36<03:07, 46.79s/it]

[I 2023-09-20 09:46:56,420] Trial 25 finished with value: 0.8640521196156259 and parameters: {'n_estimators': 129, 'max_depth': 37, 'max_features': 7, 'min_samples_leaf': 21, 'min_samples_split': 21}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  90%|████████████████████████████████████████▌    | 27/30 [32:42<02:37, 52.63s/it]

[I 2023-09-20 09:48:02,669] Trial 26 finished with value: 0.8743669568356509 and parameters: {'n_estimators': 78, 'max_depth': 44, 'max_features': 24, 'min_samples_leaf': 18, 'min_samples_split': 7}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  93%|██████████████████████████████████████████   | 28/30 [33:03<01:25, 42.97s/it]

[I 2023-09-20 09:48:23,115] Trial 27 finished with value: 0.8446914445125537 and parameters: {'n_estimators': 117, 'max_depth': 39, 'max_features': 5, 'min_samples_leaf': 28, 'min_samples_split': 11}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327:  97%|███████████████████████████████████████████▌ | 29/30 [34:05<00:48, 48.84s/it]

[I 2023-09-20 09:49:25,658] Trial 28 finished with value: 0.810788853364882 and parameters: {'n_estimators': 106, 'max_depth': 40, 'max_features': 20, 'min_samples_leaf': 48, 'min_samples_split': 4}. Best is trial 2 with value: 0.9123267430244175.


Best trial: 2. Best value: 0.912327: 100%|█████████████████████████████████████████████| 30/30 [34:39<00:00, 69.30s/it]

[I 2023-09-20 09:49:58,849] Trial 29 finished with value: 0.8317559006288882 and parameters: {'n_estimators': 82, 'max_depth': 45, 'max_features': 13, 'min_samples_leaf': 38, 'min_samples_split': 16}. Best is trial 2 with value: 0.9123267430244175.

 
 best params:  {'n_estimators': 111, 'max_depth': 33, 'max_features': 20, 'min_samples_leaf': 2, 'min_samples_split': 22} 
 
 best score:  [0.9123267430244175] 


 
 best params:  {'n_estimators': 111, 'max_depth': 33, 'max_features': 20, 'min_samples_leaf': 2, 'min_samples_split': 22} 
 
 best score:  [0.9123267430244175] 

Optimization completed in 34.651785385608676 minutes.





# 4. 贝叶斯优化

In [20]:
def optuna_objective(trial):

    #定义参数空间
    n_estimators = trial.suggest_int("n_estimators",50,150,1) #整数型，(参数名称，下界，上界，步长)
    max_depth = trial.suggest_int("max_depth",25,50,1)
    max_features = trial.suggest_int("max_features",5,30,1)
    min_samples_leaf = trial.suggest_int("min_samples_leaf",1,51,1)
    min_samples_split = trial.suggest_int("min_samples_split",2,22,1)

    #定义评估器
    #需要优化的参数由上述参数空间决定
    #不需要优化的参数则直接填写具体值
    rfc = RFC(n_estimators = n_estimators
              ,max_depth = max_depth
              ,max_features = max_features
              ,min_samples_leaf = min_samples_leaf
              ,min_samples_split = min_samples_split
              ,random_state=0
              ,verbose=False
              ,n_jobs=-1
             )

    #交叉验证过程，输出准确率(accuracy)
    cv = KFold(n_splits=5,shuffle=True,random_state=0)
    score = cross_val_score(rfc,X_dr,y
                            ,scoring='accuracy'
                            ,cv=cv #交叉验证模式
                            ,verbose=False #是否打印进程
                            ,n_jobs=-1 #线程数
                            ,error_score='raise' #如果在某个折叠中出现错误，会引发一个异常
                           )
    #最终输出平均准确率
    return np.mean(score)

In [21]:
def optimizer_optuna(n_trials, algo):

    #定义使用TPE或者GP
    if algo == "TPE":
        algo = optuna.samplers.TPESampler(n_startup_trials = 15, n_ei_candidates = 12)
    elif algo == "GP":
        from optuna.integration import SkoptSampler
        import skopt
        algo = SkoptSampler(skopt_kwargs={'base_estimator':'GP', #选择高斯过程
                                          'n_initial_points':30, #初始观测点10个
                                          'acq_func':'EI'} #选择的采集函数为EI，期望增量
                           )

    #实际优化过程，首先实例化优化器
    study = optuna.create_study(sampler = algo #要使用的具体算法
                                ,direction="maximize" #优化的方向，可以填写minimize或maximize
                               )
    #开始优化，n_trials为允许的最大迭代次数
    #由于参数空间已经在目标函数中定义好，因此不需要输入参数空间
    study.optimize(optuna_objective #目标函数
                   ,n_trials=n_trials #最大迭代次数（包括最初的观测值的）
                   ,show_progress_bar=True #要不要展示进度条呀？
                  )

    #可直接从优化好的对象study中调用优化的结果
    #打印最佳参数与最佳损失值
    print("\n","\n","best params: ", study.best_trial.params,
          "\n","\n","best score: ", study.best_trial.values,
          "\n")

    return study.best_trial.params, study.best_trial.values

In [22]:
def optimized_optuna_search_and_report(n_trials, algo):
    start_time = time.time()

    # 进行贝叶斯优化
    best_params, best_score = optimizer_optuna(n_trials, algo)

    # 打印最佳参数和分数
    print("\n","\n","best params: ", best_params,
          "\n","\n","best score: ", best_score,
          "\n")

    end_time = time.time()
    time_bayes = (end_time - start_time) / 60  # 转换为分钟
    print(f"Optimization completed in {time_bayes} minutes.")

    return best_params, best_score, time_bayes

# 执行优化
best_params, best_score, time_bayes = optimized_optuna_search_and_report(30, "TPE")

[I 2023-08-30 10:01:18,374] A new study created in memory with name: no-name-c3763327-ac2a-4515-9eea-debb9deded2e
Best trial: 0. Best value: 0.956714:   3%|█▍                                         | 1/30 [02:52<1:23:18, 172.35s/it]

[I 2023-08-30 10:04:10,723] Trial 0 finished with value: 0.9567135547815333 and parameters: {'n_estimators': 111, 'max_depth': 44, 'max_features': 30, 'min_samples_leaf': 2, 'min_samples_split': 9}. Best is trial 0 with value: 0.9567135547815333.


Best trial: 1. Best value: 0.96094:   7%|███▏                                           | 2/30 [03:23<41:45, 89.47s/it]

[I 2023-08-30 10:04:42,179] Trial 1 finished with value: 0.960940240439346 and parameters: {'n_estimators': 144, 'max_depth': 38, 'max_features': 6, 'min_samples_leaf': 16, 'min_samples_split': 20}. Best is trial 1 with value: 0.960940240439346.


Best trial: 1. Best value: 0.96094:  10%|████▋                                          | 3/30 [03:44<26:03, 57.91s/it]

[I 2023-08-30 10:05:02,529] Trial 2 finished with value: 0.9593337610511135 and parameters: {'n_estimators': 50, 'max_depth': 28, 'max_features': 10, 'min_samples_leaf': 11, 'min_samples_split': 19}. Best is trial 1 with value: 0.960940240439346.


Best trial: 1. Best value: 0.96094:  13%|██████▎                                        | 4/30 [04:49<26:24, 60.96s/it]

[I 2023-08-30 10:06:08,166] Trial 3 finished with value: 0.9404809859192685 and parameters: {'n_estimators': 98, 'max_depth': 44, 'max_features': 19, 'min_samples_leaf': 35, 'min_samples_split': 13}. Best is trial 1 with value: 0.960940240439346.


Best trial: 1. Best value: 0.96094:  17%|███████▊                                       | 5/30 [06:13<28:45, 69.00s/it]

[I 2023-08-30 10:07:31,433] Trial 4 finished with value: 0.9471594952453628 and parameters: {'n_estimators': 150, 'max_depth': 29, 'max_features': 16, 'min_samples_leaf': 35, 'min_samples_split': 13}. Best is trial 1 with value: 0.960940240439346.


Best trial: 1. Best value: 0.96094:  20%|█████████▍                                     | 6/30 [07:31<28:50, 72.09s/it]

[I 2023-08-30 10:08:49,519] Trial 5 finished with value: 0.9310121363430846 and parameters: {'n_estimators': 86, 'max_depth': 26, 'max_features': 25, 'min_samples_leaf': 35, 'min_samples_split': 21}. Best is trial 1 with value: 0.960940240439346.


Best trial: 1. Best value: 0.96094:  23%|██████████▉                                    | 7/30 [08:27<25:42, 67.07s/it]

[I 2023-08-30 10:09:46,265] Trial 6 finished with value: 0.9502876729710362 and parameters: {'n_estimators': 143, 'max_depth': 46, 'max_features': 11, 'min_samples_leaf': 42, 'min_samples_split': 8}. Best is trial 1 with value: 0.960940240439346.


Best trial: 7. Best value: 0.961194:  27%|████████████▎                                 | 8/30 [08:56<20:03, 54.72s/it]

[I 2023-08-30 10:10:14,542] Trial 7 finished with value: 0.9611938330006307 and parameters: {'n_estimators': 115, 'max_depth': 49, 'max_features': 6, 'min_samples_leaf': 11, 'min_samples_split': 20}. Best is trial 7 with value: 0.9611938330006307.


Best trial: 8. Best value: 0.962378:  30%|█████████████▊                                | 9/30 [09:51<19:12, 54.86s/it]

[I 2023-08-30 10:11:09,711] Trial 8 finished with value: 0.9623775508927567 and parameters: {'n_estimators': 129, 'max_depth': 39, 'max_features': 9, 'min_samples_leaf': 1, 'min_samples_split': 13}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  33%|███████████████                              | 10/30 [10:14<15:01, 45.06s/it]

[I 2023-08-30 10:11:32,831] Trial 9 finished with value: 0.9589957448633657 and parameters: {'n_estimators': 127, 'max_depth': 26, 'max_features': 5, 'min_samples_leaf': 27, 'min_samples_split': 18}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  37%|████████████████▌                            | 11/30 [10:39<12:21, 39.02s/it]

[I 2023-08-30 10:11:58,147] Trial 10 finished with value: 0.9452996377504428 and parameters: {'n_estimators': 55, 'max_depth': 32, 'max_features': 13, 'min_samples_leaf': 49, 'min_samples_split': 8}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  40%|██████████████████                           | 12/30 [11:12<11:05, 36.98s/it]

[I 2023-08-30 10:12:30,462] Trial 11 finished with value: 0.9610248427779734 and parameters: {'n_estimators': 116, 'max_depth': 30, 'max_features': 7, 'min_samples_leaf': 11, 'min_samples_split': 8}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  43%|███████████████████▌                         | 13/30 [13:14<17:50, 62.96s/it]

[I 2023-08-30 10:14:33,219] Trial 12 finished with value: 0.9296593210010027 and parameters: {'n_estimators': 122, 'max_depth': 28, 'max_features': 28, 'min_samples_leaf': 29, 'min_samples_split': 15}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  47%|█████████████████████                        | 14/30 [14:32<17:56, 67.29s/it]

[I 2023-08-30 10:15:50,506] Trial 13 finished with value: 0.9616166659816034 and parameters: {'n_estimators': 106, 'max_depth': 40, 'max_features': 17, 'min_samples_leaf': 3, 'min_samples_split': 3}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  50%|██████████████████████▌                      | 15/30 [16:03<18:36, 74.40s/it]

[I 2023-08-30 10:17:21,393] Trial 14 finished with value: 0.9584040288870342 and parameters: {'n_estimators': 110, 'max_depth': 38, 'max_features': 19, 'min_samples_leaf': 3, 'min_samples_split': 17}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 8. Best value: 0.962378:  53%|████████████████████████                     | 16/30 [17:18<17:25, 74.70s/it]

[I 2023-08-30 10:18:36,783] Trial 15 finished with value: 0.9463990392434042 and parameters: {'n_estimators': 87, 'max_depth': 35, 'max_features': 23, 'min_samples_leaf': 18, 'min_samples_split': 3}. Best is trial 8 with value: 0.9623775508927567.


Best trial: 16. Best value: 0.963223:  57%|████████████████████████▉                   | 17/30 [18:47<17:08, 79.10s/it]

[I 2023-08-30 10:20:06,107] Trial 16 finished with value: 0.9632232883395673 and parameters: {'n_estimators': 130, 'max_depth': 41, 'max_features': 16, 'min_samples_leaf': 3, 'min_samples_split': 2}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  60%|██████████████████████████▍                 | 18/30 [20:01<15:29, 77.48s/it]

[I 2023-08-30 10:21:19,810] Trial 17 finished with value: 0.9545150734775063 and parameters: {'n_estimators': 133, 'max_depth': 41, 'max_features': 14, 'min_samples_leaf': 20, 'min_samples_split': 11}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  63%|███████████████████████████▊                | 19/30 [20:54<12:51, 70.14s/it]

[I 2023-08-30 10:22:12,869] Trial 18 finished with value: 0.9617856919466938 and parameters: {'n_estimators': 136, 'max_depth': 35, 'max_features': 9, 'min_samples_leaf': 7, 'min_samples_split': 5}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  67%|█████████████████████████████▎              | 20/30 [22:01<11:30, 69.10s/it]

[I 2023-08-30 10:23:19,532] Trial 19 finished with value: 0.9443696196469006 and parameters: {'n_estimators': 71, 'max_depth': 42, 'max_features': 22, 'min_samples_leaf': 22, 'min_samples_split': 5}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  70%|██████████████████████████████▊             | 21/30 [23:07<10:15, 68.41s/it]

[I 2023-08-30 10:24:26,323] Trial 20 finished with value: 0.9599257629669079 and parameters: {'n_estimators': 124, 'max_depth': 49, 'max_features': 13, 'min_samples_leaf': 7, 'min_samples_split': 11}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  73%|████████████████████████████████▎           | 22/30 [24:03<08:37, 64.69s/it]

[I 2023-08-30 10:25:22,339] Trial 21 finished with value: 0.9608556738431513 and parameters: {'n_estimators': 136, 'max_depth': 35, 'max_features': 10, 'min_samples_leaf': 7, 'min_samples_split': 5}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 16. Best value: 0.963223:  77%|█████████████████████████████████▋          | 23/30 [24:46<06:46, 58.03s/it]

[I 2023-08-30 10:26:04,829] Trial 22 finished with value: 0.9613629304505869 and parameters: {'n_estimators': 135, 'max_depth': 35, 'max_features': 8, 'min_samples_leaf': 8, 'min_samples_split': 2}. Best is trial 16 with value: 0.9632232883395673.


Best trial: 23. Best value: 0.965421:  80%|███████████████████████████████████▏        | 24/30 [25:37<05:35, 55.91s/it]

[I 2023-08-30 10:26:55,802] Trial 23 finished with value: 0.9654213049919669 and parameters: {'n_estimators': 129, 'max_depth': 33, 'max_features': 9, 'min_samples_leaf': 1, 'min_samples_split': 5}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421:  83%|████████████████████████████████████▋       | 25/30 [26:48<05:02, 60.55s/it]

[I 2023-08-30 10:28:07,179] Trial 24 finished with value: 0.9611093378893021 and parameters: {'n_estimators': 100, 'max_depth': 32, 'max_features': 15, 'min_samples_leaf': 2, 'min_samples_split': 15}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421:  87%|██████████████████████████████████████▏     | 26/30 [27:48<04:01, 60.33s/it]

[I 2023-08-30 10:29:07,004] Trial 25 finished with value: 0.9584040646294673 and parameters: {'n_estimators': 127, 'max_depth': 39, 'max_features': 12, 'min_samples_leaf': 15, 'min_samples_split': 6}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421:  90%|███████████████████████████████████████▌    | 27/30 [29:48<03:54, 78.23s/it]

[I 2023-08-30 10:31:06,985] Trial 26 finished with value: 0.9617860136285896 and parameters: {'n_estimators': 121, 'max_depth': 43, 'max_features': 20, 'min_samples_leaf': 2, 'min_samples_split': 2}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421:  93%|█████████████████████████████████████████   | 28/30 [30:44<02:23, 71.50s/it]

[I 2023-08-30 10:32:02,799] Trial 27 finished with value: 0.963561090072718 and parameters: {'n_estimators': 145, 'max_depth': 33, 'max_features': 8, 'min_samples_leaf': 1, 'min_samples_split': 10}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421:  97%|██████████████████████████████████████████▌ | 29/30 [32:00<01:12, 72.90s/it]

[I 2023-08-30 10:33:18,959] Trial 28 finished with value: 0.9590804544292917 and parameters: {'n_estimators': 150, 'max_depth': 32, 'max_features': 12, 'min_samples_leaf': 12, 'min_samples_split': 6}. Best is trial 23 with value: 0.9654213049919669.


Best trial: 23. Best value: 0.965421: 100%|████████████████████████████████████████████| 30/30 [32:48<00:00, 65.63s/it]

[I 2023-08-30 10:34:07,139] Trial 29 finished with value: 0.962630964741877 and parameters: {'n_estimators': 139, 'max_depth': 36, 'max_features': 8, 'min_samples_leaf': 5, 'min_samples_split': 9}. Best is trial 23 with value: 0.9654213049919669.

 
 best params:  {'n_estimators': 129, 'max_depth': 33, 'max_features': 9, 'min_samples_leaf': 1, 'min_samples_split': 5} 
 
 best score:  [0.9654213049919669] 


 
 best params:  {'n_estimators': 129, 'max_depth': 33, 'max_features': 9, 'min_samples_leaf': 1, 'min_samples_split': 5} 
 
 best score:  [0.9654213049919669] 

Optimization completed in 32.812969688574476 minutes.





# 5. 最佳参数效果

In [3]:
X_train, X_test, Y_train, Y_test = train_test_split(X_dr, y, test_size=0.3, random_state=0)

In [13]:
# 默认参数
rfc_3 = RFC(n_estimators=1000,random_state=0)
rfc_3 = rfc_3.fit(X_train, Y_train)
pred_3 = rfc_3.predict(X_test)

# 遗传算法搜索评价指标
accuracy_score_3_train = rfc_3.score(X_train, Y_train)
accuracy_score_3_test = rfc_3.score(X_test, Y_test)
cm_3 = confusion_matrix(Y_test, pred_3)

In [14]:
accuracy_score_3_train, accuracy_score_3_test

(1.0, 0.9664694280078896)

In [None]:
# 遗传算法搜索最佳参数
rfc_1 = RFC(n_estimators=97
            ,max_depth=47
            ,max_features=12
            ,min_samples_leaf=9
            ,min_samples_split=19
            ,random_state=0
           )
rfc_1 = rfc_1.fit(X_train, Y_train)
pred_1 = rfc_1.predict(X_test)

# 遗传算法搜索评价指标
accuracy_score_1_train = rfc_1.score(X_train, Y_train)
accuracy_score_1_test = rfc_1.score(X_test, Y_test)
kappa_score_1 = cohen_kappa_score(Y_test, pred_1)
cm_1 = confusion_matrix(Y_test, pred_1)

In [21]:
accuracy_score_1_train, accuracy_score_1_test

(0.9661794902766034, 0.9602704987320372)

In [10]:
# 贝叶斯优化最佳参数
rfc_2 = RFC(n_estimators=129
            ,max_depth=33
            ,max_features=9
            ,min_samples_leaf=1
            ,min_samples_split=5
            ,random_state=0
           )
rfc_2 = rfc_2.fit(X_train, Y_train)
pred_2 = rfc_2.predict(X_test)

# 贝叶斯优化评价指标
accuracy_score_2_train = rfc_2.score(X_train, Y_train)
accuracy_score_2_test = rfc_2.score(X_test, Y_test)
kappa_score_2 = cohen_kappa_score(Y_test, pred_2)
cm_2 = confusion_matrix(Y_test, pred_2)
report = classification_report(Y_test, pred_2, output_dict=True)

In [11]:
accuracy_score_2_train, accuracy_score_2_test

(0.9955308612151226, 0.9642152719075796)

In [12]:
report = classification_report(Y_test, pred_2,output_dict=True)
# 获取加权平均的精确率、召回率、F1系数
weighted_precision = report['macro avg']['precision']
weighted_recall = report['macro avg']['recall']
weighted_f1 = report['macro avg']['f1-score']

print(f"macro Precision: {weighted_precision}")
print(f"macro Recall: {weighted_recall}")
print(f"macro F1 Score: {weighted_f1}")

macro Precision: 0.9616981229160063
macro Recall: 0.9619139535985805
macro F1 Score: 0.9615055485452823


In [13]:
class_accuracy = {label: metrics['precision'] for label, metrics in report.items() if label != 'accuracy'}
df = pd.DataFrame(list(class_accuracy.items()), columns=['Class', 'Accuracy'])
df

Unnamed: 0,Class,Accuracy
0,0,0.991667
1,1,1.0
2,2,0.990654
3,3,1.0
4,4,0.991525
5,5,0.991525
6,6,0.97541
7,7,0.872881
8,8,0.873786
9,9,0.956522


In [14]:
cm_2_ = confusion_matrix(Y_test, pred_2, normalize='pred') # 计算列归一化的混淆矩阵
cm_2_normalized = (np.round(cm_2_ * 100)).astype(int)

In [15]:
# 保存混淆矩阵
output_folder = 'D:/1_科研进展/小论文/小论文图片/混淆矩阵'

csv_filename_2 = 'final_RF.csv'
np.savetxt(f'{output_folder}/{csv_filename_2}', cm_2, delimiter=',', fmt='%d')

csv_filename_3 = 'final_RF_Normalized.csv'
np.savetxt(f'{output_folder}/{csv_filename_3}', cm_2_normalized, delimiter=',', fmt='%d')