In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (mean_absolute_error, mean_squared_error, r2_score)
from xgboost import XGBRegressor
import shap 


class XGBoostRegressorWrapper:
    def __init__(self, n_estimators=100, max_depth=6, learning_rate=0.1, 
                 min_child_weight=1, gamma=0, subsample=1, colsample_bytree=1, 
                 alpha=0, lambd=1, random_state=42, n_jobs=-1):
        self.model = XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            min_child_weight=min_child_weight,
            gamma=gamma,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_alpha=alpha,
            reg_lambda=lambd,
            random_state=random_state,
            n_jobs=n_jobs
        )

# Reading Data
df = pd.read_excel('data.xlsx')
df = df.iloc[:, 1:]  
X = df.drop(['Fu'], axis=1)
y = df['Fu']
results = []



# Divide the training set and the testing set
seed = 42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Standardization
scaler_X = StandardScaler()
scaler_y = StandardScaler()

y_train_np = y_train.values.reshape(-1, 1)
scaler_y.fit(y_train_np)

X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)
y_train_scaled = scaler_y.transform(y_train_np).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()

# Create and train the model, and modify the hyperparameters
model = XGBoostRegressorWrapper(
        n_estimators=199,        # 在这里修改超参数
        max_depth=4,
        learning_rate=0.275750472,
        min_child_weight=2,
        gamma=0.001,
        subsample=0.8,
        colsample_bytree=1,
        alpha=0.013362347,
        lambd=14.99774392,
        random_state=seed
)
model.model.fit(X_train_scaled, y_train_scaled)

# Predict and reverse transform
y_train_pred = model.model.predict(X_train_scaled)
y_test_pred = model.model.predict(X_test_scaled)

y_train_pred_original = scaler_y.inverse_transform(y_train_pred.reshape(-1, 1)).flatten()
y_test_pred_original = scaler_y.inverse_transform(y_test_pred.reshape(-1, 1)).flatten()

In [None]:
# Calculate SHAP value
import matplotlib.pyplot as plt
explainer = shap.KernelExplainer(model.model.predict, X_train_scaled) 
shap_values = explainer.shap_values(X_train_scaled)

feature_label=['$\it{h}$$_{a}$',
               '$\it{T}$$_{a}$',
               '$\it{E}$$_{a}$',
               '$\it{f}$$_{a}$',
               '$\it{ε}$$_{a}$',
               '$\it{E}$$_{c}$',
               '$\it{f}$$_{c}$',
               '$\it{b}$$_{f}$',
               '$\it{h}$$_{c}$',
               '$\it{L}$$_{b}$',
               '$\it{ε}$$_{c}$',
               '$\it{S}$$_{t}$',
               '$\it{f}$$_{s}$',
               '$\it{v}$$_{s}$',
               '$\it{n}$$_{l}$'
               ]

if isinstance(shap_values, list):
    shap_values = shap_values[0] 

plt.rcParams.update({
    'font.family': 'serif',
    'font.serif': 'Times New Roman',
    'font.size': 13,  
    'text.color': 'black',  
    'axes.labelcolor': 'black',  
    'xtick.color': 'black',  
    'ytick.color': 'black'  
})

shap.summary_plot(shap_values, X_train_scaled,feature_names=feature_label,cmap='viridis',show=False)

plt.gcf().set_size_inches(7, 6)

# Save the graphics as high-quality image files
plt.savefig('picture1.png', dpi=300, bbox_inches='tight')

shap_values_df = pd.DataFrame(shap_values, columns=X.columns)
shap_values_df.to_excel('shap_values-XGBoost.xlsx', index=False)
