In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams['axes.unicode_minus'] = False
from sklearn.model_selection import train_test_split
df = pd.read_csv(r"")
X = df.drop(['target'], axis=1)
y = df['target']


In [None]:
# Split the dataset into training set and test set.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

# XGBoost parameters
params_xgb = {
    'booster': 'gbtree',              # Boosting method, here using Gradient Boosting Tree.
    'objective': 'reg:squarederror',  # Loss function, here using squared error, which is suitable for regression tasks.
    'max_leaves': 127,                # The number of leaf nodes per tree, which controls the model complexity.
    'verbosity': 1,                   # The verbosity of XGBoost's output. 0 indicates no output, while 1 indicates outputting progress information.
    'seed': 42,                       # Random seed, used to reproduce the model's results.
    'nthread': -1,                    # The number of threads for parallel computation, where -1 indicates using all available CPU cores.
    'colsample_bytree': 0.6,          # The proportion of features randomly selected for each tree, used to enhance the model's generalization ability.
    'subsample': 0.7,                 # The proportion of samples randomly selected in each iteration, used to enhance the model's generalization ability.
    'eval_metric': 'rmse'             # Evaluation metric, here using RMSE.
}


# Initialize the XGBoost classification model
model_xgb = xgb.XGBRegressor(**params_xgb)


# Define a parameter grid for grid search
param_grid = {
    'n_estimators': [100, 200, 300, 400, 500],  # Tree number
    'max_depth': [3, 4, 5, 6, 7],               # Tree depth
    'learning_rate': [0.01, 0.02, 0.05, 0.1],   # Learning rate
}


# Use GridSearchCV for grid search and k-fold cross-validation.
grid_search = GridSearchCV(
    estimator=model_xgb,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',  # Evaluation metric is negative MSE.
    cv=10,                              # 10-fold cross-validation.
    n_jobs=-1,                         # Parallel computing.
    verbose=1                          # Output detailed progress information.
)

# Model training
grid_search.fit(X_train, y_train)

best_model_xgboost = grid_search.best_estimator_

In [None]:
import shap
explainer = shap.TreeExplainer(best_model_xgboost)
shap_values = explainer.shap_values(X)
shap_values_df = pd.DataFrame(shap_values, columns=X.columns)
shap_values_df.head()

In [None]:
# The dependence plot
shap.dependence_plot(
    'ST', 
    shap_values, 
    X, 
    interaction_index=None,  
    show=False
)
plt.savefig("6.png", format='png', bbox_inches='tight', dpi=1200)
plt.show()

In [None]:
# The dependence plot
shap.dependence_plot(
    'TC', 
    shap_values, 
    X, 
    interaction_index=None,  
    show=False
)
plt.savefig("6.png", format='png', bbox_inches='tight', dpi=1200)
plt.show()

In [None]:
# The dependence plot
shap.dependence_plot(
    'TOT', 
    shap_values, 
    X, 
    interaction_index=None,  
    show=False
)
plt.savefig("6.png", format='png', bbox_inches='tight', dpi=1200)
plt.show()

In [None]:
import matplotlib.pyplot as plt
from pygam import LinearGAM, s


# Construct GAM Mpdel
X1 = X['ST'].values.reshape(-1, 1)  # The input to a GAM must be two-dimensional.
y1 = shap_values_df['ST'].values

gam = LinearGAM(s(0)).fit(X1, y1)  # Smooth fitting for the single feature.

# Generate grid points for X (feature space) and predict SHAP values along with confidence intervals.
XX = gam.generate_X_grid(term=0)
y_pred = gam.predict(XX)
confidence_interval = gam.prediction_intervals(XX, width=0.95)  # 95% confidence interval

# Plot the GAM fitting curve and confidence interval.
plt.figure(figsize=(6, 4), dpi=1200)

# Plot the trend line (blue solid line)
plt.plot(XX, y_pred, color="blue", linewidth=2, label="Trend (GAM)")

# Plot the confidence interval (blue shaded area)
plt.fill_between(
    XX.flatten(),
    confidence_interval[:, 0],  # Lower confidence bound.
    confidence_interval[:, 1],  # Upper confidence bound.
    color="blue",
    alpha=0.2,
    label="95% CI"
)

# Add a horizontal line at shap=0.
plt.axhline(y=0, color='black', linestyle='-.', linewidth=1)

# Axe settings
plt.xlabel('ST', fontsize=12)
plt.ylabel('SHAP value', fontsize=12)
ax = plt.gca()
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# Add a legend
plt.legend()
# Save figure
plt.savefig("1.png", format='png', bbox_inches='tight')
plt.show()

In [None]:
X = X['ST'].values.reshape(-1, 1)  
y = shap_values_df['ST'].values

# Construct GAM Model
gam = LinearGAM(s(0)).fit(X, y)  # Univariate Smoothing

# Univariate Smoothing
XX = gam.generate_X_grid(term=0, n=1000)  #  Grid points of X
y_pred = gam.predict(XX)  # Predicted values from GAM fitting
confidence_interval = gam.prediction_intervals(XX, width=0.95)  # Confidence Interval

# Extract R-squared
R2 = gam.statistics_['pseudo_r2']['explained_deviance'] * 100  # Extract the explained deviance and convert it to a percentage

# Find all intersection points between y=0 and the fitted line
zero_crossing_indices = np.where(np.diff(np.sign(y_pred)))[0]

# Calculate the filling range
y_min, y_max = y_pred.min() - 0.05, y_pred.max() + 0.05

# Plot figure
plt.figure(figsize=(6, 4), dpi=1200)

# Plot y=0（Gris dotted line）
plt.axhline(y=0, color='gray', linestyle='--', linewidth=1, zorder=0)

# Plot the confidence interval (dashed line, color #ABDAFC)
plt.plot(XX, confidence_interval[:, 0], color="#9FD4AE", linestyle="--", linewidth=1, zorder=2)
plt.plot(XX, confidence_interval[:, 1], color="#9FD4AE", linestyle="--", linewidth=1, zorder=2)

# Plot the trend line (solid line, color #6A9ACF)
plt.plot(XX, y_pred, color="#6A9ACF", linewidth=2, zorder=3)

# If intersection points exist, plot gray vertical lines and dots
if len(zero_crossing_indices) > 0:
    for idx in zero_crossing_indices:
        tipping_point_x = XX[idx]
        tipping_point_y = y_pred[idx]
        plt.axvline(x=tipping_point_x, color='gray', linestyle='--', linewidth=1, zorder=1)
        plt.scatter(tipping_point_x, tipping_point_y, color='red', zorder=4)
    
    # Add the label "Tipping Point" at the first intersection point
    tipping_point_x = XX[zero_crossing_indices[0]]
    tipping_point_y = y_pred[zero_crossing_indices[0]]
    plt.text(
        tipping_point_x, tipping_point_y + 0.01,
        "Tipping Point",
        fontsize=10,
        color="red"
    )

# Calculate the filled area
y_positive = y_pred > 0  # Positive Value Region
y_negative = y_pred <= 0  # Negative Value Region

# Fill the region where y > 0
plt.fill_between(
    XX.flatten(),
    y_min,
    y_max,
    where=y_positive.flatten(),
    color="#FCDFBE",
    alpha=0.4
)

# Fill the region where y < 0
plt.fill_between(
    XX.flatten(),
    y_min,
    y_max,
    where=y_negative.flatten(),
    color="#ABDAFC",
    alpha=0.4
)

#  The P-value is obtained from gam.statistics_
P_value = gam.statistics_['p_values'][0]  

# Set the display content according to the P-value range
if P_value < 0.001:
    P_text = r"$\mathit{p < 0.001}$"  
elif P_value < 0.05:
    P_text = r"$\mathit{p < 0.05}$"  
else:
    P_text = r"$\mathit{p > 0.05}$"  

# Add the fit to the bottom-right corner, and display the P-value description below it.
plt.text(
    XX.min() + 0.1 * (XX.max() - XX.min()),
    y_min + 0.1 * (y_max - y_min),
    f"$R^2={R2:.2f}\%$\n{P_text}",  
    fontsize=10,
    color="black"
)

# Axe settings
plt.xlabel('ST', fontsize=12)
plt.ylabel('SHAP value', fontsize=12)
plt.xlim(XX.min(), XX.max())
plt.ylim(y_min, y_max)
ax = plt.gca()
ax.spines['top'].set_visible(True)
ax.spines['right'].set_visible(True)
plt.savefig("5.png", format='png', bbox_inches='tight')

# Show figure
plt.show()