In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import GPyOpt
from GPyOpt.methods import BayesianOptimization
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel, RBF

In [None]:
df = pd.read_csv('AuIrOsPdPtReRhRu_0.60_compositions_and_targets.csv')
#Splitting x and y into fitting
X_columns = ['Pt','Pd','Au','Ru','Rh','Ir','Re','Os']
x = df[X_columns].to_numpy()
y = df['current_over_capacitance'].to_numpy()

#Fitting model on the train data
#GPR model
kernel =RBF() + WhiteKernel()
rf = GaussianProcessRegressor(kernel = kernel, random_state=0, normalize_y=True).fit(x, y)

In [3]:
class bo_batch:
    def __init__(self, initial_raw):
        self.initial_raw = initial_raw
        self.compositions = self.create_composition(self.initial_raw)
        self.activity = self.f_activity(self.compositions)
        
        #Defining function and parameters for optimization
        domain_value = np.linspace(0, 1, num=101)

        self.domain = [{'name': 'var_1', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_2', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_3', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_4', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_5', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_6', 'type': 'discrete', 'domain' : domain_value},
                       {'name': 'var_7', 'type': 'discrete', 'domain' : domain_value}]
        self.constraints = [{'name':'const_1', 'constraint': 'x[:,0]+x[:,1]+x[:,2]+x[:,3]+x[:,4]+x[:,5]+x[:,6]-1'}]
        
        
    def create_composition(self, raw):
        #Create initial composition
        last_element = []
        for i in range(len(raw)):
            last_element.append(round(1 - sum(raw[i]), 2))
        last_element = np.array(last_element).reshape(-1, 1)
        composition = np.hstack((raw, last_element))
        return composition

    def f_activity(self, composition):    
        #Create initial activity
        activity = []
        for i in range(len(composition)):
            activity.append(float(rf.predict(composition[i].reshape(1, -1))))
        activity = np.array(activity).reshape(-1, 1)
        return activity
    
    def make_rf(self, composition, activity):
        #Create RF model with initial composition and activity
        self.ran = RandomForestRegressor(n_estimators = 1024,
                                    bootstrap = True,
                                    max_depth = None,
                                    max_features = 'auto',
                                    min_samples_leaf = 1,
                                    min_samples_split = 2,
                                    oob_score = True)
        self.ran.fit(composition, activity)
        return
    
    def f_BO(self, x):
        #Adjust x with number of dimension
        x = np.append(x,[1-x[:,0]-x[:,1]-x[:,2]-x[:,3]-x[:,4]-x[:,5]-x[:,6]])
        y = self.ran.predict(np.reshape(x, (1, -1)))
        return y
    
    def next_samples_BO(self):
        iteration_number = len(self.initial_raw)
        BYS = GPyOpt.methods.BayesianOptimization(self.f_BO, 
                                                  domain = self.domain,
                                                  initial_design_numdata = 1,
                                                  X = self.compositions[:, :-1],
                                                  Y = self.activity,
                                                  constraints = self.constraints,
                                                  de_duplication=True)
        BYS.run_optimization(max_iter=(iteration_number))
        self.next_compositions = self.create_composition(BYS.get_evaluations()[0][-(len(self.initial_raw)):])
        return self.next_compositions
    
    def create_next_generation(self, generation):
        for i in range(generation):
            self.make_rf(self.compositions, self.activity)
            self.compositions = np.vstack((self.compositions, self.next_samples_BO()))
            self.activity = np.vstack((self.activity, self.f_activity(self.next_compositions)))
        return

In [6]:
initial_raw = np.array([[0.79, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03],
                        [0.03, 0.79, 0.03, 0.03, 0.03, 0.03, 0.03],
                        [0.03, 0.03, 0.79, 0.03, 0.03, 0.03, 0.03],
                        [0.03, 0.03, 0.03, 0.79, 0.03, 0.03, 0.03],
                        [0.03, 0.03, 0.03, 0.03, 0.79, 0.03, 0.03],
                        [0.03, 0.03, 0.03, 0.03, 0.03, 0.79, 0.03],
                        [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.79],
                        [0.03, 0.03, 0.03, 0.03, 0.03, 0.03, 0.03]])

In [None]:
for i in range(5):
    sample = bo_batch(initial_raw)
    sample.create_next_generation(generation = 20)

    result = pd.DataFrame(sample.compositions, columns = ['Pt','Pd','Au','Ru','Rh','Ir','Re','Os'])
    result.insert(0, 'Iteration', list(range(1, (len(sample.compositions)+1))))
    result = result.assign(Activity = sample.activity)
    result.to_csv('../../raw_data/composition_vs_activity/model2/BO/result_' + str(i) + '.txt', sep='\t', mode='w')