## Bayesian Optimization Process
### In this stage, we use bayesian optimization to find the best CVD hyperparameters for the ReSe2 dendritic.

### Load the experimental data

In [None]:
import numpy as np
import pandas as pd
import emukit
import GPy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

#c−Al2O3 substrate is represented as 1, and MgO is represented as 0.
df_rese2 = pd.read_excel('data_new.xlsx',sheet_name='initial')
df_rese2.columns = ['number','T_Re', 'T_Se', 'c_Re',
       'f_H2', 'Sub', 'Fractal']
df_rese2.iloc[:,-1] = df_rese2.iloc[:,-1] *1

print(df_rese2)

In [None]:
## Set the variable space and step size of the CVD experiment.
T_Re_min, T_Re_max, T_Re_step = [580, 680, 10] ## 11 steps
T_Re_var = np.arange(T_Re_min, T_Re_max+T_Re_step, T_Re_step)
T_Re_num = len(T_Re_var)
print(T_Re_var)
print(T_Re_num)

T_Se_min, T_Se_max, T_Se_step = [220, 300, 10] ## 9 steps
T_Se_var = np.arange(T_Se_min, T_Se_max+T_Se_step, T_Se_step)
T_Se_num = len(T_Se_var)
print(T_Se_num)

c_Re_min, c_Re_max, c_Re_step = [0.025, 0.15, 0.025] ## 6 steps
c_Re_var = np.arange(c_Re_min, c_Re_max+c_Re_step, c_Re_step) 
c_Re_num = len(c_Re_var)
print(c_Re_num)

f_H2_min, f_H2_max, f_H2_step = [0.01, 0.04, 0.01] ## 4 steps
f_H2_var = np.arange(f_H2_min, f_H2_max+f_H2_step, f_H2_step)
f_H2_num = len(f_H2_var)
print(f_H2_num)

Sub_min,Sub_max, Sub_step = [0, 1, 1] ## 2 steps
Sub_var = np.arange(Sub_min, Sub_max+Sub_step, Sub_step)
Sub_num = len(Sub_var)
print(Sub_num)



var_array = [T_Re_var, T_Se_var, 
             c_Re_var, f_H2_var, 
             Sub_var]
x_labels = ['T_Re', 'T_Se', 'c_Re',
       'f_H2', 'Sub']

def x_normalizer(X):
    
    def max_min_scaler(x, x_max, x_min):
        return (x-x_min)/(x_max-x_min)
    
    x_norm = []
    for x in (X):
           x_norm.append([max_min_scaler(x[i], 
                                         max(var_array[i]), 
                                         min(var_array[i])) for i in range(len(x))])  
    return np.array(x_norm)

def x_denormalizer(x_norm):
    def max_min_rescaler(x, x_max, x_min):
        return x*(x_max-x_min)+x_min
    
    x_original = []
    for x in (x_norm):
           x_original.append([max_min_rescaler(x[i], 
                                         max(var_array[i]), 
                                         min(var_array[i])) for i in range(len(x))])
    return np.array(x_original)



def get_closest_array(suggested_x):
    
    def get_closest_value(given_value, array_list):
        absolute_difference_function = lambda list_value : abs(list_value - given_value)
        closest_value = min(array_list, key=absolute_difference_function)
        return closest_value
    
    var_list = var_array
    modified_array = []
    for x in suggested_x:
        modified_array.append([get_closest_value(x[i], var_list[i]) for i in range(len(x))])
    return np.array(modified_array)

In [None]:
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter

## Set the range of the parameter space after normalization
parameter_space = ParameterSpace([ContinuousParameter('T_Re', 0, 1),
                                 ContinuousParameter('T_Se', 0, 1),
                                 ContinuousParameter('c', 0, 1),
                                 ContinuousParameter('H2', 0, 1),
                                 DiscreteParameter('Sub', np.linspace(0,1,2)),
                                 ])
print(parameter_space)

In [None]:
from typing import Union
from emukit.core.acquisition import Acquisition
from emukit.core.interfaces import IModel, IDifferentiable
from emukit.core.loop import FixedIntervalUpdater, OuterLoop, SequentialPointCalculator
from emukit.core.loop.loop_state import create_loop_state
from emukit.core.optimization import GradientAcquisitionOptimizer
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.core.acquisition import IntegratedHyperParameterAcquisition
from emukit.bayesian_optimization.local_penalization_calculator import LocalPenalizationPointCalculator

# create a loop for building Bayesian Optimization
class ProbabilisticBayesianOptimizationLoop(OuterLoop):
    def __init__(self, space: ParameterSpace, model_objective: Union[IModel, IDifferentiable],
                 acquisition: Acquisition = None,
                 update_interval: int = 1, batch_size: int = 1):

        self.model_objective = model_objective
        
        if acquisition is None:
            acquisition = ExpectedImprovement(model_objective)

        model_updater_objective = FixedIntervalUpdater(model_objective, update_interval)

        acquisition_optimizer = GradientAcquisitionOptimizer(space)
        if batch_size == 1:
            candidate_point_calculator = SequentialPointCalculator(acquisition, acquisition_optimizer)
        else:
            candidate_point_calculator = LocalPenalizationPointCalculator(acquisition, acquisition_optimizer,
                                                                          model_objective, space, batch_size)
        loop_state = create_loop_state(model_objective.X, model_objective.Y)

        super(ProbabilisticBayesianOptimizationLoop, self).__init__(candidate_point_calculator,
                                                                   [model_updater_objective],
                                                                   loop_state)


### Run GP Regression on the collected data

In [None]:
np.random.seed=20

from GPy.models import GPRegression
from emukit.model_wrappers import GPyModelWrapper
def y_normalizer(y_2d):
     y_normalized=y_2d-1
     return y_normalized

def y_denormalizer(y_normalized):
     y_original=y_normalized+1
     return y_original
x_init = x_normalizer(df_rese2.iloc[:,1:6].values)
y_init = y_normalizer(np.transpose([df_rese2.iloc[:,-1].values]))
X, Y = [x_init, y_init]
print(X)
print(Y)


input_dim = len(X[0])
print(input_dim)
ker = GPy.kern.Matern32(input_dim = input_dim, ARD =True)#
ker.lengthscale.constrain_bounded(1e-1, 10)
ker.variance.constrain_bounded(1, 1000.0)

#ker += GPy.kern.Bias(input_dim = input_dim)
model_gpy = GPRegression(X , -Y, ker)#Emukit is a minimization tool; need to make Y negative
model_gpy.Gaussian_noise.variance = 0.05**2
model_gpy.Gaussian_noise.variance.fix()
model_gpy.randomize()
model_gpy.optimize_restarts(num_restarts=20,verbose =False, messages=False)
objective_model = GPyModelWrapper(model_gpy)



##### Check the performance of the regression model

In [None]:
f_obj = objective_model.model.predict

y_pred, y_uncer = f_obj(X)
y_pred = -y_pred[:, -1]
y_uncer = np.sqrt(y_uncer[:, -1])
y_pred_unscaled = y_denormalizer(y_pred)

print(y_pred_unscaled)
print(y_uncer)

from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(5, 3.5))  
fs = 18
lims1 = (1, 2)


ax.scatter(y_denormalizer(Y[:, -1]), y_pred_unscaled, alpha=0.5, 
           c=(112/255, 161/255, 255/255), edgecolor='navy')
ax.errorbar(y_denormalizer(Y[:, -1]), y_pred_unscaled, yerr=y_uncer, 
            ms=0, ls='', capsize=2, alpha=0.6, color='gray', zorder=0)
ax.plot(lims1, lims1, 'k--', alpha=0.75, zorder=0)

rmse_value = np.sqrt(mean_squared_error(Y[:, -1], y_pred))

ax.set_xlabel('True Values', fontsize=fs)
ax.set_ylabel('Predicted Values', fontsize=fs)
ax.tick_params(labelsize=fs-2)
ax.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Define new parameters for which predictions are needed
new_params = np.array([[620, 250, 0.075, 0.02, 1]])

# Use the model to predict new parameters, obtaining both the prediction values and uncertainty estimates
new_pred, new_uncer = objective_model.model.predict(x_normalizer(new_params))

new_pred = -new_pred[:, -1]
new_pred_unscaled = y_denormalizer(new_pred)

new_uncer = np.sqrt(new_uncer[:, -1])

print("Prediction (after denormalization):", new_pred_unscaled)
print("Uncertainty Estimate:", new_uncer)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from scipy.stats import spearmanr

mse = mean_squared_error
mse_all = mse(Y[:,-1], y_pred)
print ('all RMSE: %.4f' % (np.sqrt(mse_all)))

rsquared_all = r2_score(Y[:,-1], y_pred)
print ('all R^2: %.4f' % (rsquared_all))

sprman_all = spearmanr(Y[:,-1], y_pred)
print ('all spearman: %.4f' % (sprman_all[0]))

### Start the First Run of the Batch-mode Bayesian Optimization 

In [None]:
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement, \
                                                      NegativeLowerConfidenceBound, \
                                                      MaxValueEntropySearch, \
                                                      MultipointExpectedImprovement,\
                                                      ProbabilityOfFeasibility, \
                                                      ProbabilityOfImprovement
# Expeceted Improvement (EI)
# acquisition = ExpectedImprovement(objective_model, jitter=0.2)
## Lower Confidence Bound (LCB)
# acquisition = NegativeLowerConfidenceBound(objective_model, beta = 10)
## Probability Improvement (PI)
# acquisition = ProbabilityOfImprovement(objective_model,jitter=0)


# MaxValueEntropySearch (MES)
acquisition_generator = lambda m: MaxValueEntropySearch(m, parameter_space)
acquisition_integrated = IntegratedHyperParameterAcquisition(objective_model, acquisition_generator)
acquisition = acquisition_integrated

# Create a Bayesian optimization loop and collect new sample points
bayesopt_cons_pr = ProbabilisticBayesianOptimizationLoop(
    model_objective=objective_model,
    space=parameter_space,
    acquisition=acquisition,
    batch_size=15  # batchsize>10 to account for duplication
)

X_new = bayesopt_cons_pr.candidate_point_calculator.compute_next_points(bayesopt_cons_pr.loop_state)
X_new1= x_denormalizer(X_new)

f_obj = objective_model.model.predict

y_pred_new, y_uncer_new = f_obj(X_new)
y_pred_new = -y_pred_new
y_uncer_new = np.sqrt(y_uncer_new)
print(y_pred_new)


df_Xnew = pd.DataFrame(get_closest_array(X_new1), columns=df_rese2.columns[1:6])
df_all = pd.concat([df_rese2.iloc[:, 1:6], df_Xnew])
df_all_ = df_all.drop_duplicates()
df_Xnew = df_Xnew.sort_values(by=list(df_rese2.columns[1:6]), ignore_index = True)
df_Xnew.index = np.arange(len(df_Xnew))+len(df_rese2)
df_Xnew.iloc[:,:]



In [None]:
df_Xnew_array = df_Xnew.iloc[:, :].values
result_array = get_closest_array(df_Xnew_array)

print(result_array)


In [None]:
# Save the DataFrame to an Excel file
import pandas as pd

# Surragate Model
y_pred_new, y_uncer_new = f_obj(x_normalizer(result_array))
y_pred_new = -y_pred_new
y_uncer_new = np.sqrt(y_uncer_new)
# Aquisition function
f_acq = bayesopt_cons_pr.candidate_point_calculator.acquisition.evaluate 
acq_produc = f_acq(x_normalizer(result_array)) 
print(y_denormalizer(y_pred_new))
print(acq_produc)


X_new_columns = [f'X_new_{i}' for i in range(df_Xnew.shape[1])]
X_new_data = {col: result_array[:, i] for i, col in enumerate(X_new_columns)}
df_X_new = pd.DataFrame(X_new_data)
df_X_new['y_pred_new'] =y_denormalizer(y_pred_new) 
df_X_new['y_uncer_new'] = y_uncer_new
df_X_new['acq_produc'] = acq_produc


output_filename = "Next_round_suggestion.xlsx"
df_X_new.to_excel(output_filename, index=False)
print(f"The data has been successfully saved to an Excel file {output_filename}")



#### Visualisation of Optimization Process

In [None]:
from emukit.core.initial_designs.random_design import RandomDesign
np.random.seed=30
design = RandomDesign(parameter_space)
x_sampled = design.get_samples(200)
x_columns = df_rese2.iloc[:,1:6].columns
x_sampled_df = pd.DataFrame(x_denormalizer(x_sampled), columns=x_columns)


for i in range(input_dim):
    for j in range(input_dim-i-1):
        
## Generate a 2D grid for Contour plot
        ind1 = i
        ind2 = j+i+1
        n_steps =21
        x1x2y_pred, x1x2y_uncer =[[],[]]
        for x1 in np.linspace(0, 1, n_steps):
            for x2 in np.linspace(0, 1, n_steps):
                x_temp = np.copy(x_sampled)
                x_temp[:,ind1] = x1
                x_temp[:,ind2] = x2
                y_pred, y_uncer = f_obj(x_temp)
                y_pred = -y_pred+1

                x1_org = x_denormalizer(x_temp)[0,ind1]
                x2_org = x_denormalizer(x_temp)[0,ind2]
                x1x2y_pred.append([x1_org, x2_org, np.max(y_pred), np.mean(y_pred), np.min(y_pred)])
                x1x2y_uncer.append([x1_org, x2_org, np.max(np.sqrt(y_uncer)), np.mean(np.sqrt(y_uncer)), np.min(np.sqrt(y_uncer))])
        
        x1 = np.array(x1x2y_pred, dtype=object)[:,0].reshape(n_steps, n_steps)
        x2 = np.array(x1x2y_pred, dtype=object)[:,1].reshape(n_steps, n_steps)
            
        y_pred_max = np.array(x1x2y_pred, dtype=object)[:,2].reshape(n_steps, n_steps).astype(float)
        y_pred_mean = np.array(x1x2y_pred, dtype=object)[:,3].reshape(n_steps, n_steps).astype(float)
        y_pred_min = np.array(x1x2y_pred, dtype=object)[:,4].reshape(n_steps, n_steps).astype(float)
        
        y_uncer_max = np.array(x1x2y_uncer, dtype=object)[:,2].reshape(n_steps, n_steps).astype(float)
        y_uncer_mean = np.array(x1x2y_uncer, dtype=object)[:,3].reshape(n_steps, n_steps).astype(float)
        y_uncer_min = np.array(x1x2y_uncer, dtype=object)[:,4].reshape(n_steps, n_steps).astype(float)

        fs = 20
        title_pad = 16
        
## Contour for Prediction fractal dimension
        fig,axes = plt.subplots(1, 3, figsize=(17, 3.5), sharey = False, sharex = False)
        colorbar_offset = [12.5, 7, 4]
        for ax, c_offset, y in zip(axes, colorbar_offset,
                                   [y_pred_max, y_pred_mean, y_pred_min]):
            
            c_plt1 = ax.contourf(x1, x2, y, levels = np.arange(20)*0.05+1, cmap='plasma', extend = 'both')
            cbar = fig.colorbar(c_plt1, ax= ax)
            cbar.ax.tick_params(labelsize=fs*0.8)
            ax.scatter(x_denormalizer(X)[:, ind1], 
                       x_denormalizer(X)[:, ind2], 
                       s = 50, facecolors='gray', alpha = 0.5, edgecolor = 'gray')
            ax.scatter(x_denormalizer(X_new)[:, ind1], 
                      x_denormalizer(X_new)[:, ind2], 
                       s = 80, facecolors='green', alpha = 0.9, edgecolor = 'green')
            
            ax.set_xlabel(str(x_columns[ind1]),fontsize =  fs)
            ax.set_ylabel(str(x_columns[ind2]),fontsize =  fs)

            x1_delta = (np.max(x1)-np.min(x1))*0.02
            x2_delta = (np.max(x2)-np.min(x2))*0.02
            ax.set_xlim(np.min(x1)-x1_delta, np.max(x1)+x1_delta)
            ax.set_ylim(np.min(x2)-x2_delta, np.max(x2)+x2_delta)
            ax.tick_params(direction='in', length=5, width=1, labelsize = fs*.8)#, grid_alpha = 0.5
            if ind1==0:#T_Re
                ax.set_xticks([580, 615, 645, 680])
            if ind1==1:#T_Se
                ax.set_xticks([220, 240, 260, 280])
            if ind1==2:#c_Re
                ax.set_xticks([0.05, 0.1, 0.15])
            if ind2==4:#sub.
                ax.set_yticks([0, 1])
            #ax.grid(True, linestyle='-.')

        axes[0].set_title('objective fcn max', pad = title_pad,fontsize =  fs)
        axes[1].set_title('objective fcn mean', pad = title_pad,fontsize =  fs)
        axes[2].set_title('objective fcn min', pad = title_pad,fontsize =  fs)

        plt.subplots_adjust(wspace = 0.3)
        plt.show()
# # Contour for Uncertainty        
        fig,axes = plt.subplots(1, 3, figsize=(17, 3.5), sharey = False, sharex = False)
        colorbar_offset = [3, 2.5, 2]
        for ax, c_offset, y in zip(axes, colorbar_offset,
                                   [y_uncer_max, y_uncer_mean, y_uncer_min]):

            c_plt1 = ax.contourf(x1, x2, y,  levels = 0+np.arange(20)*0.01, cmap='plasma', extend = 'both')
            cbar = fig.colorbar(c_plt1, ax= ax)
            cbar.ax.tick_params(labelsize=fs*0.8)
            ax.scatter(x_denormalizer(X)[:, ind1], 
                       x_denormalizer(X)[:, ind2], 
                       s = 50, facecolors='gray', alpha = 0.9, edgecolor = 'gray')
            ax.scatter(x_denormalizer(X_new)[:, ind1], 
                       x_denormalizer(X_new)[:, ind2], 
                       s = 80, facecolors='green', alpha = 0.9, edgecolor = 'green')
            ax.set_xlabel(str(x_columns[ind1]),fontsize =  fs)
            ax.set_ylabel(str(x_columns[ind2]),fontsize =  fs)

            x1_delta = (np.max(x1)-np.min(x1))*0.02
            x2_delta = (np.max(x2)-np.min(x2))*0.02
            ax.set_xlim(np.min(x1)-x1_delta, np.max(x1)+x1_delta)
            ax.set_ylim(np.min(x2)-x2_delta, np.max(x2)+x2_delta)
            ax.tick_params(direction='in', length=5, width=1, labelsize = fs*.8)#, grid_alpha = 0.5
            if ind1==0:#T_Re
                ax.set_xticks([580, 615, 645, 680])
            if ind1==1:#T_Se
                ax.set_xticks([220, 240, 260, 280])
            if ind1==2:#c_Re
                ax.set_xticks([0.05, 0.1, 0.15])
            if ind2==4:#sub.
                ax.set_yticks([0, 1])


        axes[0].set_title('objective uncer max', pad = title_pad,fontsize =  fs)
        axes[1].set_title('objective uncer mean', pad = title_pad,fontsize =  fs)
        axes[2].set_title('objective uncer min', pad = title_pad,fontsize =  fs)
        plt.subplots_adjust(wspace = 0.3)
        plt.show()