In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor  
from sklearn.linear_model import LinearRegression  
from xgboost import XGBRegressor 
from sklearn.linear_model import LinearRegression  
from sklearn.model_selection import cross_val_score,train_test_split,cross_val_predict
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error,log_loss, accuracy_score
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.cm as cm 
 
data = pd.read_excel(r'd:\Desktop\1222\1225复合毒性数据.xlsx')

newdata = pd.get_dummies(data)
pd.DataFrame(newdata)

x = newdata.drop('Survival(%)',axis=1)
y = newdata['Survival(%)']

x.columns = x.columns.astype(str)
x = StandardScaler().fit_transform(x)
x = pd.DataFrame(x)


In [None]:
# 使用最佳参数初始化基础学习器  
best_rf = RandomForestRegressor(n_estimators=85, max_depth=19, random_state=55)  
best_gb = GradientBoostingRegressor(n_estimators=199, random_state=138, max_depth=7)  
best_xgb = XGBRegressor(n_estimators=100, learning_rate=0.3, max_depth=6, random_state=60, gamma=0)  

# 定义基础学习器  
estimators = [  
    ('rf', best_rf),  
    ('gbdt', best_gb),  
    ('xgb', best_xgb)  
]  

# 定义堆叠回归器  
stacking_reg = StackingRegressor(  
    estimators=estimators,  
    final_estimator=LinearRegression(),  
    cv=5  
)   

# 存储交叉验证和测试集结果  
results = []  

# 设置 random_state 循环  
for random_state in range(100):  # 你可以根据需求选择不同的循环范围  
    # 划分数据集  
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=random_state)  

    # 训练堆叠回归器  
    stacking_reg.fit(X_train, y_train)  

    # 计算交叉验证 R² 和 RMSE（使用训练集进行交叉验证）  
    cv_r2 = cross_val_score(stacking_reg, X_train, y_train, cv=5, scoring='r2')  
    cv_rmse = -cross_val_score(stacking_reg, X_train, y_train, cv=5, scoring='neg_root_mean_squared_error')  
    cv_mae = -cross_val_score(stacking_reg, X_train, y_train, cv=5, scoring='neg_mean_absolute_error')  

    # 在测试集上进行预测  
    y_pred = stacking_reg.predict(X_test)  

    # 计算测试集的 R² 和 RMSE  
    test_r2 = r2_score(y_test, y_pred)  
    test_rmse = mean_squared_error(y_test, y_pred, squared=False)  
    test_mae = mean_absolute_error(y_test, y_pred)  

    # 存储结果  
    results.append((random_state, cv_r2, cv_rmse, cv_mae, test_r2, test_rmse, test_mae))  

# 输出结果  
print("\n交叉验证和测试集结果：")  
for state, cv_r2, cv_rmse, cv_mae, test_r2, test_rmse, test_mae in results:  
    print(f'random_state={state}, 交叉验证 R²得分: {np.mean(cv_r2):.2f}, 交叉验证 RMSE: {np.mean(cv_rmse):.2f}, '  
          f'交叉验证 MAE: {np.mean(cv_mae):.2f}, 测试集 R²: {test_r2:.2f}, 测试集 RMSE: {test_rmse:.2f}, 测试集 MAE: {test_mae:.2f}')


交叉验证和测试集结果：
random_state=0, 交叉验证 R²得分: 0.77, 交叉验证 RMSE: 16.95, 交叉验证 MAE: 10.54, 测试集 R²: 0.82, 测试集 RMSE: 14.91, 测试集 MAE: 8.96
random_state=1, 交叉验证 R²得分: 0.73, 交叉验证 RMSE: 18.25, 交叉验证 MAE: 11.37, 测试集 R²: 0.83, 测试集 RMSE: 14.40, 测试集 MAE: 10.49
random_state=2, 交叉验证 R²得分: 0.73, 交叉验证 RMSE: 17.73, 交叉验证 MAE: 11.84, 测试集 R²: 0.77, 测试集 RMSE: 18.37, 测试集 MAE: 12.10
random_state=3, 交叉验证 R²得分: 0.83, 交叉验证 RMSE: 14.75, 交叉验证 MAE: 9.95, 测试集 R²: 0.78, 测试集 RMSE: 15.83, 测试集 MAE: 8.16
random_state=4, 交叉验证 R²得分: 0.81, 交叉验证 RMSE: 15.40, 交叉验证 MAE: 10.05, 测试集 R²: 0.75, 测试集 RMSE: 17.71, 测试集 MAE: 11.70
random_state=5, 交叉验证 R²得分: 0.77, 交叉验证 RMSE: 16.69, 交叉验证 MAE: 10.90, 测试集 R²: 0.90, 测试集 RMSE: 11.47, 测试集 MAE: 7.93
random_state=6, 交叉验证 R²得分: 0.80, 交叉验证 RMSE: 15.97, 交叉验证 MAE: 10.99, 测试集 R²: 0.88, 测试集 RMSE: 11.73, 测试集 MAE: 7.80
random_state=7, 交叉验证 R²得分: 0.83, 交叉验证 RMSE: 14.47, 交叉验证 MAE: 9.95, 测试集 R²: 0.83, 测试集 RMSE: 14.49, 测试集 MAE: 9.14
random_state=8, 交叉验证 R²得分: 0.75, 交叉验证 RMSE: 17.08, 交叉验证 MAE: 11.74, 测试集 R²: 0.93, 