In [None]:
pip install modAL

In [None]:
pip install git+https://github.com/modAL-python/modAL.git

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, RBF, ExpSineSquared
from modAL.models import BayesianOptimizer
from modAL.acquisition import optimizer_EI, max_EI, max_UCB, max_PI
from ipywidgets.widgets import interact,interactive
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings('ignore')

plt.rcParams.update({'font.size': 12})
plt.rcParams["figure.figsize"] = (15,8)
plt.show()

In [None]:
pip install seaborn

In [None]:
from google.colab import drive
#drive.mount('/content/drive')

In [None]:
# ================================
# 1. Connect to Google Sheets
# ================================
!pip install --quiet gspread gspread_dataframe

import gspread
import pandas as pd
from google.colab import auth
from google.auth import default
from gspread_dataframe import get_as_dataframe

In [None]:
# Authenticate
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)

In [None]:
# 👉 Replace with your Google Sheets URL or ID
SHEET_URL = '' #insert relevant link
# Extract the spreadsheet ID from the URL
spreadsheet_id = SHEET_URL.split('/')[-2]
spreadsheet = gc.open_by_key(spreadsheet_id)
worksheet = spreadsheet.worksheet("Sheet1")

# Convert to pandas DataFrame using get_as_dataframe and specifying the header row
data = get_as_dataframe(
    worksheet,
    usecols=[1,2,3,4,5,6,7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], # keep cols B-U.
    header=1)


print("Data loaded from Google Sheets:")
print(data.head())

Data loaded from Google Sheets:
   Fraction_Fe  Fraction_Al  Fraction_Si  s_loc_Fe  s_scale_Fe  s_mode_Fe  \
0     0.002043     0.790271     0.207685  4.667805    2.629459   1.000000   
1     0.001644     0.793107     0.205249  4.312864    3.225134   0.999983   
2     0.003751     0.797295     0.198954  1.968683    5.804383   1.000000   
3     0.002646     0.715634     0.281720  3.578333    2.704356   1.000000   
4     0.003938     0.713604     0.282458  1.443828    5.353665   0.999999   

   s_loc_Al  s_scale_Al  s_mode_Al  s_loc_Si  s_scale_Si  s_mode_Si  \
0  4.723456   13.701967   0.397869  0.897714    5.245641   0.271674   
1  7.354702    6.462756   0.439239  0.684782    5.327519   0.338023   
2  0.642170    5.767209   7.097608  0.764883    4.997357   0.336441   
3  6.296788    4.418780   0.902652  0.474849    3.900660   0.274855   
4  6.137379    4.690521   0.881376  0.514242    3.937513   0.255552   

     angle_Fe    angle_Al    angle_Si     AR_Fe     AR_Al     AR_Si  UTS/MPa  

In [None]:
# ================================
# 2. Prepare the data
# ================================
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from matplotlib import cm
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, WhiteKernel

# Select the relevant rows first

# Expecting columns named "X", "Y", "Z". Reshape for consistency with your script.
X1 = data["Fraction_Fe"].to_numpy().reshape(-1,1)
X2 = data["Fraction_Al"].to_numpy().reshape(-1,1)
X3 = data["Fraction_Si"].to_numpy().reshape(-1,1)
X4 = data["s_loc_Fe"].to_numpy().reshape(-1,1)
X5 = data["s_scale_Fe"].to_numpy().reshape(-1,1)
X6 = data["s_mode_Fe"].to_numpy().reshape(-1,1)
X7 = data["s_loc_Al"].to_numpy().reshape(-1,1)
X8 = data["s_scale_Al"].to_numpy().reshape(-1,1)
X9 = data["s_mode_Al"].to_numpy().reshape(-1,1)
X10 = data["s_loc_Si"].to_numpy().reshape(-1,1)
X11 = data["s_scale_Si"].to_numpy().reshape(-1,1)
X12 = data["s_mode_Si"].to_numpy().reshape(-1,1)
X13 = data["angle_Fe"].to_numpy().reshape(-1,1)
X14 = data["angle_Al"].to_numpy().reshape(-1,1)
X15 = data["angle_Si"].to_numpy().reshape(-1,1)
X16 = data["AR_Fe"].to_numpy().reshape(-1,1)
X17 = data["AR_Al"].to_numpy().reshape(-1,1)
X18 = data["AR_Si"].to_numpy().reshape(-1,1)
#Z = data["UTS/MPa"].to_numpy().reshape(-1,1)
Z = data["G N/m"].to_numpy().reshape(-1,1)


n_features = 18

# Stack features
X_s = np.hstack((X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,X13,X14,X15,X16,X17,X18))


print("Shape of X_s:", np.shape(X_s))

# Scaling
SS_x = StandardScaler()
SS_y = StandardScaler()

X_t = SS_x.fit_transform(X_s)
y = SS_y.fit_transform(Z.reshape(-1, 1))
print("Shape of y:", np.shape(y))

# Latin Hypercube Sampling for initial training set
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans

n_initial_points = 10

# Using KMeans to select initial points that are spread out in the feature space
kmeans = KMeans(n_clusters=n_initial_points, random_state=0, n_init=10)
kmeans.fit(X_t)
initial_indices = []
for i in range(n_initial_points):
    # Find the index of the point closest to each cluster center
    distances = np.linalg.norm(X_t - kmeans.cluster_centers_[i], axis=1)
    closest_index = np.argmin(distances)
    initial_indices.append(closest_index)


X_initial, y_initial = X_t[initial_indices], y[initial_indices]
print(f"Selected {n_initial_points} initial points using KMeans-based sampling.")


print(initial_indices)
print("Shape of X_initial:", np.shape(X_initial))
print("Shape of y_initial:", np.shape(y_initial))

# ================================
# 3. Define the model & optimizer
# ================================

kernel = Matern(length_scale=[1.0]*n_features, length_scale_bounds=(1e-1, 10.0), nu=1.5) + WhiteKernel(noise_level=1)
#kernel = Matern(length_scale=[1.0]*n_features, length_scale_bounds=(1e-1, 10.0), nu=0.5)
#kernel = WhiteKernel(noise_level=1)
regressor = GaussianProcessRegressor(kernel=kernel,
                     alpha               = 5e-10,
                     copy_X_train        = False,
                     optimizer           = "fmin_l_bfgs_b",
                     n_restarts_optimizer= 0,
                     normalize_y         = False,
                     random_state        = None)


optimizer = BayesianOptimizer(
    estimator=regressor,
    X_training=X_initial,
    y_training=y_initial,
    query_strategy=max_EI
)

# ================================
# 4. Run Bayesian Optimization
# ================================
query_list, x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8, x_9, x_10, x_11, x_12, x_13, x_14, x_15, x_16, x_17, x_18, z_ = [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []

for n_query in range(50):
    query_idx, query_inst = optimizer.query(X_t)
    query_list.append(query_idx)
    optimizer.teach(X_t[query_idx,:].reshape(1, -1), y[query_idx].reshape(1, -1))

    y_pred, y_std = optimizer.predict(X_t, return_std=True)
    y_pred, y_std = y_pred.ravel(), y_std.ravel()
    X_max, y_max = optimizer.get_max()

    x1 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][0]
    x2 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][1]
    x3 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][2]
    x4 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][3]
    x5 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][4]
    x6 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][5]
    x7 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][6]
    x8 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][7]
    x9 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][8]
    x10 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][9]
    x11 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][10]
    x12 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][11]
    x13 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][12]
    x14 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][13]
    x15 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][14]
    x16 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][15]
    x17 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][16]
    x18 = SS_x.inverse_transform(X_t[query_idx,:].reshape(1, -1))[0][17]
    z  = SS_y.inverse_transform(y[query_idx][0].reshape(-1, 1)).item()


    x_1.append(x1)
    x_2.append(x2)
    x_3.append(x3)
    x_4.append(x4)
    x_5.append(x5)
    x_6.append(x6)
    x_7.append(x7)
    x_8.append(x8)
    x_9.append(x9)
    x_10.append(x10)
    x_11.append(x11)
    x_12.append(x12)
    x_13.append(x13)
    x_14.append(x14)
    x_15.append(x15)
    x_16.append(x16)
    x_17.append(x17)
    x_18.append(x18)
    z_.append(z)

X_max_final_scaled, y_max_in_queried_final_scaled = optimizer.get_max()
X_max_pred_final = SS_x.inverse_transform(X_max_final_scaled.reshape(1, -1))
y_max_data_final = SS_y.inverse_transform(y_max_in_queried_final_scaled.reshape(1, -1))

print("Best point after", len(optimizer.y_training) - n_initial_points, "queries is:", X_max_pred_final, y_max_data_final)


Shape of X_s: (225, 18)
Shape of y: (225, 1)
Selected 10 initial points using KMeans-based sampling.
[np.int64(107), np.int64(68), np.int64(144), np.int64(223), np.int64(1), np.int64(184), np.int64(81), np.int64(19), np.int64(53), np.int64(2)]
Shape of X_initial: (10, 18)
Shape of y_initial: (10, 1)
Best point after 50 queries is: [[2.70100000e-03 7.93180000e-01 2.04119000e-01 3.72159000e+00
  4.87635500e+00 2.90585900e-06 6.42370000e+00 7.01377500e+00
  5.49965000e-01 7.80296000e-01 5.23962600e+00 3.00972000e-01
  8.92107980e+01 1.83667817e+02 1.83442550e+02 1.10007000e-01
  8.59163000e-01 7.48413000e-01]] [[415.9884]]


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# ================================
# 5. Report best result
# ================================
X_max_final_scaled, y_max_in_queried_final_scaled = optimizer.get_max()
X_max_pred_final = SS_x.inverse_transform(X_max_final_scaled.reshape(1, -1))
y_max_data_final = SS_y.inverse_transform(y_max_in_queried_final_scaled.reshape(1, -1))

print("Best point after", len(optimizer.y_training) - n_initial_points, "queries is:", X_max_pred_final, y_max_data_final)

# Get the standard deviation at the best point
_, y_std_at_max_scaled = optimizer.predict(X_max_final_scaled.reshape(1, -1), return_std=True)
# Note: Standard deviation is not scaled in the same way as the mean,
# but the value directly from the GP model is on the scaled target space.
# To interpret it on the original scale, you can multiply by the standard deviation of the original target data.
y_std_at_max_original_scale = y_std_at_max_scaled.item() * SS_y.scale_[0]


print(f"Predicted standard deviation at the best point (on original scale): {y_std_at_max_original_scale:.4f}")


#ax.scatter(x1, x2, x3, z, c='k', s=50)
#ax.text(x1, x2, z, f"No. {n_query}", zdir=(0, 0, 0))

#ax.set_xlabel("X")
#ax.set_ylabel("Y")
#ax.set_zlabel(r"f(x,y)")
#ax.plot(x_1, x_2, z_, 'r-->', alpha=0.8, linewidth=2)

# Calculate and store the best value found at each iteration
best_values_at_each_iter = [SS_y.inverse_transform(optimizer.y_training[:i].max().reshape(1, -1)).item() for i in range(1, len(optimizer.y_training) + 1)]

# Get the values obtained at each query
queried_values = [SS_y.inverse_transform(y_val.reshape(1, -1)).item() for y_val in optimizer.y_training]

# The plotting of convergence, simple regret, cumulative regret, and MAPE
# should ideally be done after the optimization loop is complete in HLhol4cO3A3d
# or in separate cells that use the collected data lists.
# The code below is moved or should be placed after the optimization loop.

print("\nBest value found and queried value at each iteration:")
for i, (best_val, queried_val) in enumerate(zip(best_values_at_each_iter, queried_values)):
    print(f"Iteration {i+1}: Best Value = {best_val:.4f}, Queried Value = {queried_val:.4f}")

plt.figure(figsize=(10, 6))
plt.plot(range(1, len(best_values_at_each_iter) + 1), best_values_at_each_iter, marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Best Objective Function Value Found')
plt.title('Convergence of Bayesian Optimization (Best Value vs. Iterations)')
plt.grid(True)
plt.show()

# ================================
# 6. Report performance metrics
# ================================

# Calculate the true maximum value in the dataset
max_true_value = SS_y.inverse_transform(y.max().reshape(1, -1)).item()

# Calculate the simple regret at each iteration
simple_regret_at_each_iter = [max_true_value - best_val for best_val in best_values_at_each_iter]

# Plot the simple regret at each iteration
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(simple_regret_at_each_iter) + 1), simple_regret_at_each_iter, marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Simple Regret')
plt.title('Bayesian Optimization Simple Regret vs. Iterations')
plt.grid(True)
plt.show()

# Calculate the cumulative regret at each iteration (sum of simple regrets - not standard definition of cumulative regret)
cumulative_regret_at_each_iter = np.cumsum(simple_regret_at_each_iter)

# Calculate INSTANTANEOUS REGRET
instantaneous_regret_at_each_iter = [max_true_value - queried_val for queried_val in queried_values]
# Calculate actual cumulative regret
cumul_regret_at_each_iter = np.cumsum(instantaneous_regret_at_each_iter)

# Plot the cumulative regret
plt.figure(figsize=(10, 6))
plt.plot(range(1, len(cumul_regret_at_each_iter) + 1), cumul_regret_at_each_iter, marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Cumulative Regret')
plt.title('Bayesian Optimization Cumulative Regret vs. Iterations')
plt.grid(True)
plt.show()

# Calculate the Mean Absolute Percentage Error (MAPE)
# MAPE = (1/n) * sum(|(Actual - Forecast) / Actual|) * 100
# In this context, "Actual" can be considered the best value found so far,
# and "Forecast" is the queried value at each iteration.

# Ensure both lists have the same length
if len(best_values_at_each_iter) != len(queried_values):
     print("Error: The length of best_values_at_each_iter and queried_values do not match.")
else:
    # Calculate absolute percentage error for each iteration
    absolute_percentage_errors = [
        np.abs((best - queried) / best) * 100
        for best, queried in zip(best_values_at_each_iter, queried_values)
        if best != 0  # Avoid division by zero if best value is 0
    ]

    # Calculate the cumulative mean of the absolute percentage errors
    cumulative_mean_absolute_percentage_error = np.cumsum(absolute_percentage_errors) / np.arange(1, len(absolute_percentage_errors) + 1)


    # Calculate the final mean of the absolute percentage errors
    if absolute_percentage_errors:
        mape = np.mean(absolute_percentage_errors)
        print(f"\nMean Absolute Percentage Error (MAPE): {mape:.4f}%")

        # Plot the cumulative mean absolute percentage error at each iteration
        plt.figure(figsize=(10, 6))
        plt.plot(range(1, len(cumulative_mean_absolute_percentage_error) + 1), cumulative_mean_absolute_percentage_error, marker='o', linestyle='-')
        plt.xlabel('Number of Queries')
        plt.ylabel('Cumulative Mean Absolute Percentage Error (%)')
        plt.title('Cumulative Mean Absolute Percentage Error vs. Iterations')
        plt.grid(True)
        plt.show()

    else:
        print("\nCould not calculate or plot MAPE (possibly due to best values being zero).")


# Predict the objective function values for all the scaled input data
y_pred_scaled, _ = optimizer.predict(X_t, return_std=True)

# Inverse transform the predicted values and the true values back to their original scale
y_pred = SS_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
y_true = SS_y.inverse_transform(y).ravel()

# Create a scatter plot of true vs. predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_true, y_pred, alpha=0.5)

# Add a diagonal line
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--', lw=2)

# Label axes and add title
plt.xlabel('True Effective Fracture Energy N/m')
plt.ylabel('Predicted Effective Fracture Energy N/m')
plt.title('Predicted vs. True Objective Function Values')
plt.grid(True)
plt.show()

from sklearn.metrics import r2_score, mean_squared_error

r2 = r2_score(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
plt.text(0.05, 0.95, f"R² = {r2:.3f}\nRMSE = {rmse:.3f}", transform=plt.gca().transAxes,
         fontsize=12, verticalalignment='top', bbox=dict(boxstyle='round', facecolor='white', alpha=0.7))

y_pred_scaled, y_std_scaled = optimizer.predict(X_t, return_std=True)
plt.scatter(y_true, y_pred, c=y_std_scaled, cmap='viridis', alpha=0.7)
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--', lw=2) # Add a diagonal line
plt.colorbar(label='Prediction uncertainty')
plt.xlabel('True Effective Fracture Energy N/m')
plt.ylabel('Predicted Effective Fracture Energy N/m')
plt.title('Predicted vs. True Objective Function Values')
plt.grid(True)
plt.show()

# ================================
# 7. Save Results to Google Drive
# ================================

# Define the directory for saving results
# You can change this directory name
save_directory = '/content/drive/MyDrive/Bayesian_Optimization_Results'
os.makedirs(save_directory, exist_ok=True)
print(f"\nSaving results to: {save_directory}")

# Save best point and standard deviation
best_point_data = {
    'Best X (Original Scale)': [X_max_pred_final[0].tolist()], # Convert numpy array to list of list for CSV
    'Best Y (Original Scale)': [y_max_data_final[0].tolist()], # Convert numpy array to list of list for CSV
    'Predicted Std Dev at Best Point (Original Scale)': [y_std_at_max_original_scale]
}
best_point_df = pd.DataFrame(best_point_data)
best_point_df.to_csv(os.path.join(save_directory, 'best_point_results.csv'), index=False)
print(f"Best point results saved to: {os.path.join(save_directory, 'best_point_results.csv')}")


# Save best value found and queried value at each iteration
iteration_values_data = {
    'Iteration': range(1, len(best_values_at_each_iter) + 1),
    'Best Value Found': best_values_at_each_iter,
    'Queried Value': queried_values
}
iteration_values_df = pd.DataFrame(iteration_values_data)
iteration_values_df.to_csv(os.path.join(save_directory, 'iteration_values.csv'), index=False)
print(f"Iteration values saved to: {os.path.join(save_directory, 'iteration_values.csv')}")


# Save data and plot for Convergence
convergence_data = {
    'Number of Queries': range(1, len(best_values_at_each_iter) + 1),
    'Best Objective Function Value Found': best_values_at_each_iter
}
convergence_df = pd.DataFrame(convergence_data)
convergence_df.to_csv(os.path.join(save_directory, 'convergence_data.csv'), index=False)
print(f"Convergence data saved to: {os.path.join(save_directory, 'convergence_data.csv')}")

plt.figure(figsize=(10, 6))
plt.plot(convergence_df['Number of Queries'], convergence_df['Best Objective Function Value Found'], marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Best Objective Function Value Found')
plt.title('Convergence of Bayesian Optimization (Best Value vs. Iterations)')
plt.grid(True)
plt.savefig(os.path.join(save_directory, 'convergence_plot.png'))
plt.close() # Close the plot to free memory
print(f"Convergence plot saved to: {os.path.join(save_directory, 'convergence_plot.png')}")

# Save data and plot for Simple Regret
simple_regret_data = {
    'Number of Queries': range(1, len(simple_regret_at_each_iter) + 1),
    'Simple Regret': simple_regret_at_each_iter
}
simple_regret_df = pd.DataFrame(simple_regret_data)
simple_regret_df.to_csv(os.path.join(save_directory, 'simple_regret_data.csv'), index=False)
print(f"Simple regret data saved to: {os.path.join(save_directory, 'simple_regret_data.csv')}")

plt.figure(figsize=(10, 6))
plt.plot(simple_regret_df['Number of Queries'], simple_regret_df['Simple Regret'], marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Simple Regret')
plt.title('Bayesian Optimization Simple Regret vs. Iterations')
plt.grid(True)
plt.savefig(os.path.join(save_directory, 'simple_regret_plot.png'))
plt.close()
print(f"Simple regret plot saved to: {os.path.join(save_directory, 'simple_regret_plot.png')}")

# Save data and plot for Cumulative Regret (based on simple regret sum)
cumulative_regret_simple_sum_data = {
    'Number of Queries': range(1, len(cumulative_regret_at_each_iter) + 1),
    'Cumulative Regret (Simple Sum)': cumulative_regret_at_each_iter
}
cumulative_regret_simple_sum_df = pd.DataFrame(cumulative_regret_simple_sum_data)
cumulative_regret_simple_sum_df.to_csv(os.path.join(save_directory, 'cumulative_regret_simple_sum_data.csv'), index=False)
print(f"Cumulative regret (simple sum) data saved to: {os.path.join(save_directory, 'cumulative_regret_simple_sum_data.csv')}")

plt.figure(figsize=(10, 6))
plt.plot(cumulative_regret_simple_sum_df['Number of Queries'], cumulative_regret_simple_sum_df['Cumulative Regret (Simple Sum)'], marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Cumulative Regret (Simple Sum)')
plt.title('Bayesian Optimization Cumulative Regret (Simple Sum) vs. Iterations')
plt.grid(True)
plt.savefig(os.path.join(save_directory, 'cumulative_regret_simple_sum_plot.png'))
plt.close()
print(f"Cumulative regret (simple sum) plot saved to: {os.path.join(save_directory, 'cumulative_regret_simple_sum_plot.png')}")

# Save data and plot for Cumulative Regret (based on instantaneous regret sum)
cumulative_regret_instantaneous_data = {
    'Number of Queries': range(1, len(cumul_regret_at_each_iter) + 1),
    'Cumulative Regret (Instantaneous Sum)': cumul_regret_at_each_iter
}
cumulative_regret_instantaneous_df = pd.DataFrame(cumulative_regret_instantaneous_data)
cumulative_regret_instantaneous_df.to_csv(os.path.join(save_directory, 'cumulative_regret_instantaneous_data.csv'), index=False)
print(f"Cumulative regret (instantaneous sum) data saved to: {os.path.join(save_directory, 'cumulative_regret_instantaneous_data.csv')}")

plt.figure(figsize=(10, 6))
plt.plot(cumulative_regret_instantaneous_df['Number of Queries'], cumulative_regret_instantaneous_df['Cumulative Regret (Instantaneous Sum)'], marker='o', linestyle='-')
plt.xlabel('Number of Queries')
plt.ylabel('Cumulative Regret')
plt.title('Bayesian Optimization Cumulative Regret vs. Iterations')
plt.grid(True)
plt.savefig(os.path.join(save_directory, 'cumulative_regret_instantaneous_plot.png'))
plt.close()
print(f"Cumulative regret (instantaneous sum) plot saved to: {os.path.join(save_directory, 'cumulative_regret_instantaneous_plot.png')}")


# Save data and plot for Absolute Percentage Error and MAPE
if absolute_percentage_errors:
    absolute_percentage_error_data = {
        'Number of Queries': range(1, len(absolute_percentage_errors) + 1),
        'Absolute Percentage Error (%)': absolute_percentage_errors
    }
    absolute_percentage_error_df = pd.DataFrame(absolute_percentage_error_data)
    absolute_percentage_error_df.to_csv(os.path.join(save_directory, 'absolute_percentage_error_data.csv'), index=False)
    print(f"Absolute percentage error data saved to: {os.path.join(save_directory, 'absolute_percentage_error_data.csv')}")

    mape_data = {'Mean Absolute Percentage Error (MAPE)': [mape]}
    mape_df = pd.DataFrame(mape_data)
    mape_df.to_csv(os.path.join(save_directory, 'mape_data.csv'), index=False)
    print(f"MAPE data saved to: {os.path.join(save_directory, 'mape_data.csv')}")


    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(absolute_percentage_error_df) + 1), absolute_percentage_error_df['Absolute Percentage Error (%)'], marker='o', linestyle='-')
    plt.xlabel('Number of Queries')
    plt.ylabel('Absolute Percentage Error (%)')
    plt.title('Absolute Percentage Error vs. Iterations')
    plt.grid(True)
    plt.savefig(os.path.join(save_directory, 'absolute_percentage_error_plot.png'))
    plt.close()
    print(f"Absolute percentage error plot saved to: {os.path.join(save_directory, 'absolute_percentage_error_plot.png')}")

    # Plot Cumulative Mean Absolute Percentage Error
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(cumulative_mean_absolute_percentage_error) + 1), cumulative_mean_absolute_percentage_error, marker='o', linestyle='-')
    plt.xlabel('Number of Queries')
    plt.ylabel('Cumulative Mean Absolute Percentage Error (%)')
    plt.title('Cumulative Mean Absolute Percentage Error vs. Iterations')
    plt.grid(True)
    plt.savefig(os.path.join(save_directory, 'cumulative_mean_absolute_percentage_error_plot.png'))
    plt.close()
    print(f"Cumulative Mean Absolute Percentage Error plot saved to: {os.path.join(save_directory, 'cumulative_mean_absolute_percentage_error_plot.png')}")


# Save data and plot for Predicted vs True Objective Function Values
predicted_vs_true_data = {
    'True Effective Fracture Energy N/m': y_true,
    'Predicted Effective Fracture Energy N/m': y_pred
}
predicted_vs_true_df = pd.DataFrame(predicted_vs_true_data)
predicted_vs_true_df.to_csv(os.path.join(save_directory, 'predicted_vs_true_data.csv'), index=False)
print(f"Predicted vs true data saved to: {os.path.join(save_directory, 'predicted_vs_true_data.csv')}")

plt.figure(figsize=(10, 6))
plt.scatter(predicted_vs_true_df['True Effective Fracture Energy N/m'], predicted_vs_true_df['Predicted Effective Fracture Energy N/m'], alpha=0.5)
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'k--', lw=2) # Add diagonal line
plt.xlabel('True Effective Fracture Energy N/m')
plt.ylabel('Predicted Effective Fracture Energy N/m')
plt.title('Predicted vs. True Objective Function Values')
plt.grid(True)
plt.savefig(os.path.join(save_directory, 'predicted_vs_true_plot.png'))
plt.close()
print(f"Predicted vs true plot saved to: {os.path.join(save_directory, 'predicted_vs_true_plot.png')}")

print("\nAll requested data and plots saved.")