In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
plt.rcParams['font.sans-serif'] = ["Arial"]

In [None]:
pb_data= pd.read_excel('TEP_3.xlsx',sheet_name='model_3')
pb_data.set_axis(['Number','TEP(ul)','NMP(ul)','2-Me(ul)','Temp(℃)','MACl(%)','conc(mmol/1000ul)','NMP ratio(%)','The highest efficiency'],axis=1)

pb_data

In [None]:
df=pb_data[['TEP(ul)','NMP(ul)','2-Me(ul)','Temp(℃)','MACl(%)','The highest efficiency']]

df.columns = ['TEP (μL)',  'NMP (μL)', '2-Me (μL)',  'Temp (°C)', 'MACl (%)', 'Efficiency (%)']
df

In [None]:
import emukit
import GPy
from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs.random_design import RandomDesign
from emukit.core.initial_designs.latin_design import LatinDesign

In [None]:
TEP_min, TEP_max, TEP_step = [500, 651, 10] ## Unit: uL
TEP_var = np.arange(TEP_min, TEP_max+TEP_step*0.1, TEP_step)
TEP_num = len(TEP_var)

NMP_min, NMP_max, NMP_step = [30, 81, 5] ## Unit: uL
NMP_var = np.arange(NMP_min, NMP_max+NMP_step*0.1, NMP_step)
NMP_num = len(NMP_var)

TwoMe_min, TwoMe_max, TwoMe_step = [0, 50, 5] ## Unit: uL
TwoMe_var = np.arange(TwoMe_min, TwoMe_max+TwoMe_step*0.1, TwoMe_step) 
TwoMe_num = len(TwoMe_var)

Temp_min, Temp_max, Temp_step = [120, 150, 5] ## Unit: degree C
Temp_var = np.arange(Temp_min, Temp_max+Temp_step*0.1, Temp_step)
Temp_num = len(Temp_var)

MACl_min, MACl_max, MACl_step = [10, 40, 5] # Unit: %
MACl_var = np.arange(MACl_min, MACl_max+MACl_step*0.1, MACl_step)
MACl_num = len(MACl_var)



var_array = [TEP_var, NMP_var, TwoMe_var, Temp_var, MACl_var]
x_labels = ['TEP (μL)', 
            'NMP (μL)', 
            '2-Me (μL)',  
            'Temp (°C)', 
            'MACl (%)']

In [None]:
X_all_grid = []
for tep in TEP_var:
    for nmp in NMP_var:
        for twome in TwoMe_var:
            for temp in Temp_var:
                for macl in MACl_var:
                    X_all_grid.append([tep, nmp, twome, temp, macl])
X_all_grid = np.array(X_all_grid)
X_all_grid.shape

In [None]:
def x_normalizer(X, var_array = var_array):
    
    def max_min_scaler(x, x_max, x_min):
        return (x-x_min)/(x_max-x_min)
    x_norm = []
    for x in (X):
           x_norm.append([max_min_scaler(x[i], 
                         max(var_array[i]), 
                        min(var_array[i])) for i in range(len(x))])
                          #min(var_array[i])) for i in range(len(x))])
    return np.array(x_norm)
def x_denormalizer(x_norm, var_array = var_array):
    
    def max_min_rescaler(x, x_max, x_min):
        return x*(x_max-x_min)+x_min
    x_original = []
    for x in (x_norm):
           x_original.append([max_min_rescaler(x[i], 
                              max(var_array[i]), 
                              #min(var_array[i])) for i in range(len(x))])
                               min(var_array[i])) for i in range(len(x))])
    return np.array(x_original)


def get_closest_array(suggested_x):   
    
    def get_closest_value(given_value, array_list):
        absolute_difference_function = lambda list_value :abs(list_value - given_value)  
        closest_value = min(array_list, key=absolute_difference_function)  #absolute_difference_function=abs(list_value - given_value)
        return closest_value
    
    var_list = var_array
    modified_array = []
    for x in suggested_x:
        modified_array.append([get_closest_value(x[i], var_list[i]) for i in range(len(x))])
    return np.array(modified_array)

In [None]:
parameter_space = ParameterSpace([ContinuousParameter('TEP', 0-1/(TEP_num-1)/2, 1+1/(TEP_num-1)/2),
                                  ContinuousParameter('NMP', 0-1/(NMP_num-1)/2, 1+1/(NMP_num-1)/2),
                                  ContinuousParameter('TwoMe', 0-1/(TwoMe_num-1)/2, 1+1/(TwoMe_num-1)/2),
                                  ContinuousParameter('Temp', 0-1/(Temp_num-1)/2, 1+1/(Temp_num-1)/2),
                                  ContinuousParameter('MACl', 0-1/(MACl_num-1)/2, 1+1/(MACl_num-1)/2),
                                  
                                  ])

In [None]:
import numpy as np
np.float = float
np.int = int
np.bool = bool
np.object = object
np.str = str
from GPy.models import GPRegression
from emukit.model_wrappers import GPyModelWrapper
x_exp = x_normalizer(df.iloc[:,0:5].values)
#print(x_exp)
y_exp = np.transpose(([df.iloc[:,-1].values]))
X, Y = [x_exp, y_exp]
#print(Y)
#print(len(x_exp))
input_dim = 5
ker = GPy.kern.Matern52(input_dim = input_dim, ARD = True)#
ker.lengthscale.constrain_bounded(0.01, 5)
ker.variance.constrain_bounded(1e-2, 10000.0) 
model_gpy = GPRegression(X , -Y, ker)
model_gpy.Gaussian_noise.variance =0.25
model_gpy.Gaussian_noise.variance.fix()
model_gpy.randomize()
model_gpy.optimize_restarts(num_restarts=30,verbose =False, messages=False)
objective_model = GPyModelWrapper(model_gpy)
print(objective_model.model.kern.lengthscale)
print(objective_model.model.kern.variance)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import spearmanr


f_obj = objective_model.model.predict
y_pred, y_uncer = f_obj(X)
y_pred = -y_pred[:, -1]
y_uncer = np.sqrt(y_uncer[:, -1])


fig, axes = plt.subplots(1, 3, figsize=(5.5 * 3, 4.5))
fs = 20
lims1 = (0, 1.2)


axes[0].scatter(Y[:, -1]/20, y_pred/20, alpha=0.5, c='navy', edgecolor='navy')
axes[0].errorbar(Y[:, -1]/20, y_pred/20, yerr=y_uncer/20, ms=0,
                 ls='', capsize=2, alpha=0.6, color='gray', zorder=0)
axes[0].plot(lims1, lims1, 'k--', alpha=0.75, zorder=0)

rmse_value = np.sqrt(mean_squared_error(Y[:, -1], y_pred))
mae_value = mean_absolute_error(Y[:, -1], y_pred)
spearman_value = spearmanr(Y[:, -1], y_pred)[0]
rsquared_value = r2_score(Y[:, -1], y_pred)

print('MAE:', np.round(mae_value, 4), ' ',
      'RMSE:', np.round(rmse_value, 4), ' ',
      'spearman:', np.round(spearman_value, 4), ' ',
      'R² score:', np.round(rsquared_value, 4))

title = 'GPR' + " (MAE=%.2f" % mae_value + ' [%])'
axes[0].set_xlabel('Truth Normalized PCE (a.u.)', fontproperties='Arial', fontsize=24, labelpad=10)
axes[0].set_ylabel('Prediction Normalized PCE (a.u.)', fontproperties='Arial', fontsize=24, labelpad=10)
axes[0].set_title(title, fontsize=fs, fontproperties='Arial', pad=20)

axes[0].tick_params(axis='both', direction='in', length=5, width=1, labelsize=20, pad=10)

axes[1].axis("off")
axes[2].axis("off")


for ax in axes:
    ax.tick_params(direction='in', length=5, width=1, labelsize=20, grid_alpha=0.5)
    ax.grid(True, linestyle='-.')

plt.subplots_adjust(wspace=0.4)


plt.savefig("TEP_model3.png", dpi=600, bbox_inches='tight') 


plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

design = RandomDesign(parameter_space)
x_sampled = design.get_samples(200)
x_columns = df.columns[:5]  

for i in range(input_dim):
    for j in range(input_dim - i - 1):
        
        # 2D grid for Contour plot
        ind1 = i
        ind2 = j + i + 1
        n_steps = 21
        x1x2y_pred, x1x2y_uncer = [[], []]

        for x1 in np.linspace(0, 1, n_steps):
            for x2 in np.linspace(0, 1, n_steps):
                x_temp = np.copy(x_sampled)
                x_temp[:, ind1] = x1
                x_temp[:, ind2] = x2
                y_pred, y_uncer = f_obj(x_temp)
                x1_org = x_denormalizer(x_temp)[0, ind1]
                x2_org = x_denormalizer(x_temp)[0, ind2]
                y_pred = -y_pred
                x1x2y_pred.append([x1_org, x2_org, np.max(y_pred / 20), np.mean(y_pred / 20), np.min(y_pred / 20)])
                x1x2y_uncer.append([x1_org, x2_org, np.max(np.sqrt(y_uncer / 20)), np.mean(np.sqrt(y_uncer / 20)), np.min(np.sqrt(y_uncer / 20))])

        x1 = np.array(x1x2y_pred, dtype=object)[:, 0].reshape(n_steps, n_steps)
        x2 = np.array(x1x2y_pred, dtype=object)[:, 1].reshape(n_steps, n_steps)
        y_pred_max = np.array(x1x2y_pred, dtype=object)[:, 2].reshape(n_steps, n_steps)
        y_pred_mean = np.array(x1x2y_pred, dtype=object)[:, 3].reshape(n_steps, n_steps)
        y_pred_min = np.array(x1x2y_pred, dtype=object)[:, 4].reshape(n_steps, n_steps)

        y_uncer_max = np.array(x1x2y_uncer, dtype=object)[:, 2].reshape(n_steps, n_steps)
        y_uncer_mean = np.array(x1x2y_uncer, dtype=object)[:, 3].reshape(n_steps, n_steps)
        y_uncer_min = np.array(x1x2y_uncer, dtype=object)[:, 4].reshape(n_steps, n_steps)

        fs = 18
        title_pad = 16

        # Contour for Prediction Efficiency Mean
        fig, axes = plt.subplots(1, 3, figsize=(17, 4), sharey=False, sharex=False)
        colorbar_offset = [20/20, 12/20, 4]
        for ax, c_offset, y in zip(axes, colorbar_offset, [y_pred_max, y_pred_mean, y_pred_min]):
            c_plt1 = ax.contourf(
                x1, x2, y,
                levels=np.arange(18)*0.1/10 + c_offset,
                cmap='plasma', extend='both'
            )
            cbar = fig.colorbar(c_plt1, ax=ax)
            cbar.ax.tick_params(labelsize=fs*0.8,)
            cbar.ax.set_yticklabels(cbar.ax.get_yticklabels(), weight='bold')
            ax.scatter(
                x_denormalizer(X)[:, ind1], 
                x_denormalizer(X)[:, ind2], 
                s=40, facecolors='white', alpha=0.5, edgecolor='green'
            )
            ax.set_xlabel(str(x_columns[ind1]), fontsize=24, fontproperties='Arial', labelpad=10)
            ax.set_ylabel(str(x_columns[ind2]), fontsize=24, fontproperties='Arial', labelpad=10)
            x1_delta = (np.max(x1) - np.min(x1)) * 0.05
            x2_delta = (np.max(x2) - np.min(x2)) * 0.05
            ax.set_xlim(np.min(x1) - x1_delta, np.max(x1) + x1_delta)
            ax.set_ylim(np.min(x2) - x2_delta, np.max(x2) + x2_delta)
            ax.tick_params(direction='in', length=5, width=1, labelsize=20)
            if ind1 == 0:
                ax.set_xticks([500, 550, 600, 650])
            if ind1 == 1:
                ax.set_xticks([30, 40, 50, 60, 70, 80])
            if ind1 == 2:
                ax.set_xticks([0, 10, 20, 30, 40, 50])
            if ind1 == 3:
                ax.set_xticks([120, 130, 140, 150])
            if ind1 == 4:
                ax.set_xticks([10, 20, 30, 40])
            for label in ax.get_xticklabels() + ax.get_yticklabels():
                label.set_fontweight('bold')
                label.set_fontsize(fs*0.8)
        
        axes[0].set_title('objective fcn max', pad=title_pad, fontsize=fs, fontproperties='Arial')
        axes[1].set_title('objective fcn mean', pad=title_pad, fontsize=fs, fontproperties='Arial')
        axes[2].set_title('objective fcn min', pad=title_pad, fontsize=fs, fontproperties='Arial')
        plt.subplots_adjust(wspace=0.3)
        

        save_name = f"热图_{ind1+1}_{ind2+1}.png"
        plt.savefig(save_name, dpi=600, bbox_inches='tight',facecolor='white', edgecolor='white')
        plt.show()
        plt.close(fig) 