In [None]:
import os
import numpy as np
import pandas as pd
import scipy.io as scio
from scipy.io import savemat  
import scipy.io as sio
from sklearn.metrics import r2_score
import shap
import matplotlib.pyplot as plt
from pandas import Series,DataFrame
from mealpy import FloatVar,SSA,WOA,AVOA,SRSR,SLO,FOX,SeaHO,PSO
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, KFold

In [None]:
# Reading Data
df = pd.read_excel('data.xlsx')
df = df.iloc[:, 1:] 
X = df.drop(['Fu'], axis=1).values
y = df['Fu'].values

# Divide the training set and the testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
data_train_x=X_train
data_train_y=y_train
data_test_x=X_test
data_test_y=y_test

# Standardization
X_mean, y_mean = data_train_x.mean(0), data_train_y.mean(0)
X_std, y_std = data_train_x.std(0), data_train_y.std(0)

data_train_x_nor = (data_train_x - X_mean) / X_std  
data_test_x_nor = (data_test_x - X_mean) / X_std

data_train_y_nor = (data_train_y - y_mean) / y_std  
data_test_y_nor = (data_test_y - y_mean) / y_std

In [None]:
# Evaluation Function

def evaluate_regress(y_pre, y_true):
   
    MAE=np.sum(np.abs(y_pre-y_true))/len(y_true)
    print('MAE为: ',str(MAE))

    MAPE=np.sum(np.abs((y_pre-y_true)/y_true))/len(y_true)
    print('MAPE为: ',str(MAPE))

    MSE=np.sum((y_pre-y_true) ** 2)/len(y_true)
    print('MSE为: ',str(MSE))
    
    RMSE=np.sqrt(MSE)
    print('RMSE为: ',str(RMSE))

    R2=r2_score(y_true, y_pre)
    print('R2为: ',str(R2))

    return MAE,MAPE,MSE,RMSE,R2

In [None]:
import time
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

# Record the start time of the whole process.
overall_start_time = time.time()

# Create a global normalization object
scaler_x = StandardScaler()
scaler_y = StandardScaler()

best_fitness_history = []
best_params_history = []

def evaluate_model(solution):
    hidden_layer_sizes = (int(solution[0]), int(solution[1]))  
    learning_rate = solution[2]
    
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    mae_scores = []
    
    for train_index, val_index in kf.split(data_train_x):
        X_train_fold, X_val_fold = data_train_x[train_index], data_train_x[val_index]
        y_train_fold, y_val_fold = data_train_y[train_index], data_train_y[val_index]


        X_train_fold = scaler_x.fit_transform(X_train_fold).astype(np.float32)
        X_val_fold = scaler_x.transform(X_val_fold).astype(np.float32)

        y_train_fold = scaler_y.fit_transform(y_train_fold.reshape(-1, 1)).flatten()
        y_val_fold = scaler_y.transform(y_val_fold.reshape(-1, 1)).flatten()




        model = MLPRegressor(hidden_layer_sizes=hidden_layer_sizes, activation='relu', 
                              solver='adam', learning_rate_init=learning_rate, 
                               max_iter=2000, 
                             random_state=42, verbose=0)
        model.fit(X_train_fold, y_train_fold)

        y_pred = model.predict(X_val_fold)

        mae_fold = mean_absolute_error(y_val_fold, y_pred)
        mae_scores.append(mae_fold)
    
    if mae_scores:
        mean_mae = np.mean(mae_scores)
        
        if len(best_fitness_history) < 150 or mean_mae < best_fitness_history[-1]:
            best_fitness_history.append(mean_mae)
            best_params_history.append(solution)

        if len(best_fitness_history) > 150:
            best_fitness_history.pop(0)
            best_params_history.pop(0)

        return mean_mae
    else:
        return float('inf')

# Define parameter ranges
param_grid = {
    "obj_func": evaluate_model,
    "bounds": [
        FloatVar(lb=8, ub=128),   
        FloatVar(lb=8, ub=128),   
        FloatVar(lb=0.01, ub=0.1), 
    ],
    "minmax": "min"
}

# Set parameters for the AVOA algorithm
epoch = 150
pop_size = 15
AVOA_model = AVOA.OriginalAVOA(epoch=epoch, pop_size=pop_size)

# Solve the optimization problem
AVOA_best = AVOA_model.solve(param_grid)

# Create a DataFrame to save the best fitness and hyperparameters
best_history_df = pd.DataFrame({
    'Best_Fitness': best_fitness_history,
    'Hidden_Layer_1': [int(param[0]) for param in best_params_history], 
    'Hidden_Layer_2': [int(param[1]) for param in best_params_history], 
    'Learning_Rate': [param[2] for param in best_params_history]
})

# Best parameters
final_best_params = AVOA_best.solution
best_hidden_layers = (int(final_best_params[0]), int(final_best_params[1]))
best_learning_rate = final_best_params[2]

# Save DataFrame to Excel
output_path = 'optimized_paras_AVOA_MLP.xlsx'
with pd.ExcelWriter(output_path) as writer:
    best_history_df.to_excel(writer, sheet_name='Best_Parameters', index=False)
    pd.DataFrame([final_best_params], columns=[f'Final_Param_{i+1}' for i in range(len(final_best_params))]).to_excel(writer, sheet_name='Final_Best_Parameters', index=False)




# Use the best parameters to train the final model
best_model = MLPRegressor(hidden_layer_sizes=best_hidden_layers, activation='relu', 
                           solver='adam', learning_rate_init=best_learning_rate, 
                           max_iter=2000, 
                           random_state=42)
best_model.fit(data_train_x_nor, data_train_y_nor)

# Prediction
y_pred_test_nor = best_model.predict(data_test_x_nor)
y_pred_train_nor = best_model.predict(data_train_x_nor)

# Anti-standardization
y_pred_test = y_pred_test_nor * y_std + y_mean
y_pred_test1 = y_pred_test.reshape(len(y_pred_test), 1)
data_test_y1 = data_test_y.reshape(len(data_test_y), 1) 

y_pred_train = y_pred_train_nor * y_std + y_mean
y_pred_train1 = y_pred_train.reshape(len(y_pred_train), 1)
data_train_y1 = data_train_y.reshape(len(data_train_y), 1)

# Calculation error
T_MAE, T_MAPE, T_MSE, T_RMSE, T_R2 = evaluate_regress(y_pred_test1, data_test_y1)
R_MAE, R_MAPE, R_MSE, R_RMSE, R_R2 = evaluate_regress(y_pred_train1, data_train_y1)


# Create a DataFrame
errors_test = pd.DataFrame({
    'test—Metric': ['MAE', 'MAPE', 'MSE', 'RMSE', 'R2'],
    'test—Value': [T_MAE, T_MAPE, T_MSE, T_RMSE, T_R2]
})
errors_train = pd.DataFrame({
    'train—Metric': ['MAE', 'MAPE', 'MSE', 'RMSE', 'R2'],
    'train—Value': [R_MAE, R_MAPE, R_MSE, R_RMSE, R_R2]
})

# Reconstruct predictions and true values
predictions = np.concatenate((y_pred_train[:, np.newaxis], y_pred_test[:, np.newaxis]), axis=0)

# Fix: directly reshape data_train_y and data_test_y as numpy arrays
truevalues = np.concatenate((data_train_y[:, np.newaxis], data_test_y[:, np.newaxis]), axis=0)

# Flatten the arrays
predictions = predictions.ravel()
truevalues = truevalues.ravel()

results_df = pd.DataFrame({'Predictions': predictions, 'True Values': truevalues})


# Save the results to Excel
output_path_results = 'results_optimized_AVOA_MLP.xlsx'
with pd.ExcelWriter(output_path_results) as writer:
    errors_test.to_excel(writer, sheet_name='Test_Errors', index=False)
    errors_train.to_excel(writer, sheet_name='Train_Errors', index=False)
    results_df.to_excel(writer, sheet_name='Predictions', index=False)

# Record the end time of the whole process.
overall_end_time = time.time()
overall_total_time = overall_end_time - overall_start_time
print(f"Total time taken: {overall_end_time - overall_start_time:.2f} seconds")

# Save total runtime to an Excel file
with pd.ExcelWriter(output_path_results, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    process_time_df = pd.DataFrame({
        'Total_Process_Time_Seconds': [overall_total_time]
    })
    process_time_df.to_excel(writer, sheet_name='Process_Time', index=False)