In [1]:
import warnings, sys
import numpy as np
import pandas as pd
import pickle
from pprint import pprint

import plotly.express as px
warnings.filterwarnings("ignore")

In [2]:
sys.path.append('../src')
import BayesOpt as bo
from BayesOpt import GaussianProcessRegressor as GPR

In [3]:
# loading dataset fo designed conditions
dist_matrix = np.load('../data/opt_conditions/dis_matrix.npy')
df_reactions = pd.read_pickle('../data/opt_conditions/df_reaction_conditions.pkl')

## The baseline set of random selection

In [5]:
# Random selection
n_samples = 4
n_steps = 11
random_seed = 2021
random = np.random.RandomState(random_seed)
df_random = pd.DataFrame(columns=['step', 'idx', 'name', 'ligand', 'molecule_id', 'Ni'])
for i in range(n_steps):
    for idx in random.choice(df_reactions.index, size=n_samples, replace=False):
        df_random.loc[idx, :] = [i+1] + df_reactions.loc[idx, :].tolist()

In [72]:
df_random.to_excel('../data/opt_conditions/random_suggestion.xlsx')

## Step 0

In [5]:
# Get init experiments results for collaborators
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name='step_0', index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)

In [28]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.2
}
bo_kwargs={
    'bounds': np.array([0, 1]),
    'optimizer': 'sampling',
    'acq_func': 'UCB'
}
# Init the optimiser
opt = bo.BayesOptimizer(
    base_estimator=GPR(kernel_matrix=dist_matrix, **gpr_kwargs),
    sampling=test_x,
    **bo_kwargs
)

In [27]:
# Generating the kappa paramaters
parallel_param = bo.kwargs_generator(mean=3, size=8)
print(parallel_param)

{'kappa': [0.22408678026896017, 0.4892121500301603, 0.8163480513500929, 1.6466458209495274, 2.7119390757823454, 4.298965942098589, 5.4441425670408385, 6.221457727021481]}


In [None]:
# Running Bayes optimization for first time
opt.tell(train_x, train_y)
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)

In [31]:
# Get the details of suggested samples
next_ = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_.append(idx)
df_suggestion = df_reactions.loc[next_, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
372,"[2, 1, 2]",L3_Ra05_Rb016_3,3,127,3,0.224087
2173,"[12, 1, 3]",L13_Ra05_Rb016_4,13,127,4,0.489212
612,"[3, 7, 2]",L4_Ra09_Rb016_3,4,239,3,0.816348
4213,"[23, 7, 3]",L26_Ra09_Rb016_4,26,239,4,1.646646
4235,"[23, 9, 5]",L26_Ra09_Rb020_6,26,243,6,2.711939
1895,"[10, 9, 5]",L11_Ra09_Rb020_6,11,243,6,4.298966
4196,"[23, 5, 6]",L26_Ra07_Rb020_7,26,187,7,5.444143
1856,"[10, 5, 6]",L11_Ra07_Rb020_7,11,187,7,6.221458


In [53]:
# Save data ind  instance
df_suggestion.to_excel('../data/opt_conditions/suggestion.xlsx', sheet_name='step_1')
# Remove the kernel matrix to save space
opt.base_estimator.k_matrix = None
for estimator in opt.gpr:
    estimator.k_matrix = None
with open('../data/opt_conditions/condition_opt.pkl', 'wb') as file:
    pickle.dump(opt, file)

# Step 1

In [17]:
# Get experiments results
n_step = 1
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
372,"[2, 1, 2]",L3_Ra05_Rb016_3,3,127,3,0.224087,0.72,0.72,0.72
2173,"[12, 1, 3]",L13_Ra05_Rb016_4,13,127,4,0.489212,0.0,0.0,0.0
612,"[3, 7, 2]",L4_Ra09_Rb016_3,4,239,3,0.816348,0.62,0.67,0.645
4213,"[23, 7, 3]",L26_Ra09_Rb016_4,26,239,4,1.646646,0.0,0.0,0.0
4235,"[23, 9, 5]",L26_Ra09_Rb020_6,26,243,6,2.711939,0.0,0.0,0.0
1895,"[10, 9, 5]",L11_Ra09_Rb020_6,11,243,6,4.298966,0.0,0.0,0.0
4196,"[23, 5, 6]",L26_Ra07_Rb020_7,26,187,7,5.444143,0.0,0.0,0.0
1856,"[10, 5, 6]",L11_Ra07_Rb020_7,11,187,7,6.221458,0.0,0.0,0.0


In [167]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.16
}
# Loading previous optimization object
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 1 results and fitting GPs
opt.tell(train_x, train_y)

In [None]:
# Generating the kappa parameters
parallel_param = bo.kwargs_generator(mean=2, size=8)
pprint(parallel_param)

In [164]:
parallel_param = {'kappa': [0.010837062195875595,
           0.15536798499266918,
           0.22136995004017182,
           0.8124133955563699,
           1.191112020095647,
           1.2494428338584582,
           2.9263785567848566,
           3.61480301852049]}

In [166]:
# Running Bayes optimization
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
# Get the details of suggested samples
next_ = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_.append(idx)
df_suggestion = df_reactions.loc[next_, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
422,"[2, 6, 2]",L3_Ra09_Rb011_3,3,234,3,0.010837
522,"[2, 16, 2]",L3_Ra18_Rb016_3,3,491,3,0.155368
472,"[2, 11, 2]",L3_Ra12_Rb016_3,3,323,3,0.22137
392,"[2, 3, 2]",L3_Ra05_Rb018_3,3,129,3,0.812413
421,"[2, 6, 1]",L3_Ra09_Rb011_2,3,234,2,1.191112
452,"[2, 9, 2]",L3_Ra09_Rb020_3,3,243,3,1.249443
383,"[2, 2, 3]",L3_Ra05_Rb017_4,3,128,4,2.926379
451,"[2, 9, 1]",L3_Ra09_Rb020_2,3,243,2,3.614803


# Step 2

In [16]:
# Get experiments results
n_step = 2
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
432,"[2, 7, 2]",L3_Ra09_Rb016_3,3,239,3,0.010837,0.68,0.66,0.67
382,"[2, 2, 2]",L3_Ra05_Rb017_3,3,128,3,0.155368,0.7,0.67,0.685
381,"[2, 2, 1]",L3_Ra05_Rb017_2,3,128,2,0.22137,0.73,0.74,0.735
561,"[3, 2, 1]",L4_Ra05_Rb017_2,4,128,2,0.812413,0.71,0.71,0.71
540,"[3, 0, 0]",L4_Ra05_Rb011_1,4,122,1,1.191112,0.56,0.56,0.56
550,"[3, 1, 0]",L4_Ra05_Rb016_1,4,127,1,1.249443,0.54,0.59,0.565
3680,"[20, 8, 0]",L23_Ra09_Rb017_1,23,240,1,2.926379,0.67,0.65,0.66
3681,"[20, 8, 1]",L23_Ra09_Rb017_2,23,240,2,3.614803,0.56,0.59,0.575


In [227]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.12
}
# Loading previous optimization object
with open('./data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 2 results and fitting GPs
opt.tell(train_x, train_y)

In [226]:
# Generating the kappa parameters
parallel_param = bo.kwargs_generator(mean=3, size=8)
pprint(parallel_param)

{'kappa': [0.20533997913602084,
           0.7009305660362705,
           1.0025867195003118,
           2.2807151602479343,
           2.7251485839659315,
           3.4530456642173792,
           6.533641600494207,
           7.003329183469068]}


In [231]:
# Running Bayes optimization
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
# Get the details of suggested samples
next_ = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_.append(idx)
df_suggestion = df_reactions.loc[next_, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
421,"[2, 6, 1]",L3_Ra09_Rb011_2,3,234,2,0.20534
621,"[3, 8, 1]",L4_Ra09_Rb017_2,4,240,2,0.700931
422,"[2, 6, 2]",L3_Ra09_Rb011_3,3,234,3,1.002587
801,"[4, 8, 1]",L5_Ra09_Rb017_2,5,240,2,2.280715
201,"[1, 2, 1]",L2_Ra05_Rb017_2,2,128,2,2.725149
741,"[4, 2, 1]",L5_Ra05_Rb017_2,5,128,2,3.453046
3871,"[21, 9, 1]",L24_Ra09_Rb020_2,24,243,2,6.533642
2971,"[16, 9, 1]",L18_Ra09_Rb020_2,18,243,2,7.003329


# Step 3

In [15]:
# Get experiments data
n_step = 3
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
421,"[2, 6, 1]",L3_Ra09_Rb011_2,3,234,2,0.20534,0.53,0.52,0.525
621,"[3, 8, 1]",L4_Ra09_Rb017_2,4,240,2,0.700931,0.68,0.66,0.67
422,"[2, 6, 2]",L3_Ra09_Rb011_3,3,234,3,1.002587,0.56,0.51,0.535
801,"[4, 8, 1]",L5_Ra09_Rb017_2,5,240,2,2.280715,0.37,0.61,0.49
201,"[1, 2, 1]",L2_Ra05_Rb017_2,2,128,2,2.725149,0.77,0.76,0.765
741,"[4, 2, 1]",L5_Ra05_Rb017_2,5,128,2,3.453046,0.25,0.22,0.235
3871,"[21, 9, 1]",L24_Ra09_Rb020_2,24,243,2,6.533642,0.45,0.45,0.45
2971,"[16, 9, 1]",L18_Ra09_Rb020_2,18,243,2,7.003329,0.0,0.0,0.0


In [10]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.15
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 3 results and fitting GPs
opt.tell(train_x, train_y)

In [139]:
# Generating the kappa parameters
parallel_param = bo.kwargs_generator(mean=2, size=8)
pprint(parallel_param)

{'kappa': [0.06381380670603536,
           0.4256794096421874,
           0.9921894306547225,
           1.7849034151181256,
           1.8964528913767067,
           2.6352727444529918,
           5.724473344015227,
           8.92841222079481]}


In [141]:
# Running Bayes optimization
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_ = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_.append(idx)
df_suggestion = df_reactions.loc[next_, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
610,"[3, 7, 0]",L4_Ra09_Rb016_1,4,239,1,0.063814
250,"[1, 7, 0]",L2_Ra09_Rb016_1,2,239,1,0.425679
240,"[1, 6, 0]",L2_Ra09_Rb011_1,2,234,1,0.992189
529,"[2, 16, 9]",L3_Ra18_Rb016_10,3,491,10,1.784903
539,"[2, 17, 9]",L3_Ra20_Rb016_10,3,519,10,1.896453
349,"[1, 16, 9]",L2_Ra18_Rb016_10,2,491,10,2.635273
3909,"[21, 12, 9]",L24_Ra14_Rb016_10,24,379,10,5.724473
3009,"[16, 12, 9]",L18_Ra14_Rb016_10,18,379,10,8.928412


# Step 4

In [14]:
# Get experiments data
n_step = 4
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
610,"[3, 7, 0]",L4_Ra09_Rb016_1,4,239,1,0.063814,0.76,0.73,0.745
250,"[1, 7, 0]",L2_Ra09_Rb016_1,2,239,1,0.425679,0.77,0.77,0.77
240,"[1, 6, 0]",L2_Ra09_Rb011_1,2,234,1,0.992189,0.73,0.73,0.73
529,"[2, 16, 9]",L3_Ra18_Rb016_10,3,491,10,1.784903,0.61,0.61,0.61
539,"[2, 17, 9]",L3_Ra20_Rb016_10,3,519,10,1.896453,0.55,0.52,0.535
349,"[1, 16, 9]",L2_Ra18_Rb016_10,2,491,10,2.635273,0.67,0.62,0.645
3909,"[21, 12, 9]",L24_Ra14_Rb016_10,24,379,10,5.724473,0.08,0.12,0.1
3009,"[16, 12, 9]",L18_Ra14_Rb016_10,18,379,10,8.928412,0.0,0.0,0.0


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.16
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 4 results and fitting GPs
opt.tell(train_x, train_y)

In [35]:
# Generating the kappa paramaters
parallel_param = bo.kwargs_generator(mean=3, size=8)
pprint(parallel_param)

{'kappa': [0.2631649254565268,
           0.3584006228000721,
           2.2014362307645547,
           2.292344721835996,
           3.6518833766369387,
           4.559933756267952,
           7.532726501262211,
           8.831959371982384]}


In [37]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
251,"[1, 7, 1]",L2_Ra09_Rb016_2,2,239,2,0.263165
200,"[1, 2, 0]",L2_Ra05_Rb017_1,2,128,1,0.358401
1340,"[7, 8, 0]",L8_Ra09_Rb017_1,8,240,1,2.201436
3320,"[18, 8, 0]",L21_Ra09_Rb017_1,21,240,1,2.292345
1520,"[8, 8, 0]",L9_Ra09_Rb017_1,9,240,1,3.651883
4040,"[22, 8, 0]",L25_Ra09_Rb017_1,25,240,1,4.559934
687,"[3, 14, 7]",L4_Ra17_Rb016_8,4,463,8,7.532727
697,"[3, 15, 7]",L4_Ra17_Rb017_8,4,464,8,8.831959


# Step 5

In [18]:
# Get experiments data
n_step = 5
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
251,"[1, 7, 1]",L2_Ra09_Rb016_2,2,239,2,0.263165,0.83,0.75,0.79
200,"[1, 2, 0]",L2_Ra05_Rb017_1,2,128,1,0.358401,0.65,0.74,0.695
1340,"[7, 8, 0]",L8_Ra09_Rb017_1,8,240,1,2.201436,0.67,0.66,0.665
3320,"[18, 8, 0]",L21_Ra09_Rb017_1,21,240,1,2.292345,0.66,0.63,0.645
1520,"[8, 8, 0]",L9_Ra09_Rb017_1,9,240,1,3.651883,0.0,0.0,0.0
4040,"[22, 8, 0]",L25_Ra09_Rb017_1,25,240,1,4.559934,0.0,0.0,0.0
687,"[3, 14, 7]",L4_Ra17_Rb016_8,4,463,8,7.532727,0.67,0.67,0.67
697,"[3, 15, 7]",L4_Ra17_Rb017_8,4,464,8,8.831959,0.59,0.52,0.555


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 1e3),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 1e4),
    'noise': 0.06
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 5 results and fitting GPs
opt.tell(train_x, train_y)

In [87]:
# Generating the kappa paramaters
parallel_param = bo.kwargs_generator(mean=3, size=8)
pprint(parallel_param)

{'kappa': [0.15501445325028057,
           0.2656646947591378,
           0.29187605389876536,
           1.0297172530473635,
           1.4619565896260411,
           2.257929900027268,
           2.518322110353824,
           6.243362200007914]}


In [143]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.


Predicted variances smaller than 0. Setting those variances to 0.



Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
262,"[1, 8, 2]",L2_Ra09_Rb017_3,2,240,3,0.155014
202,"[1, 2, 2]",L2_Ra05_Rb017_3,2,128,3,0.265665
191,"[1, 1, 1]",L2_Ra05_Rb016_2,2,127,2,0.291876
709,"[3, 16, 9]",L4_Ra18_Rb016_10,4,491,10,1.029717
708,"[3, 16, 8]",L4_Ra18_Rb016_9,4,491,9,1.461957
297,"[1, 11, 7]",L2_Ra12_Rb016_8,2,323,8,2.25793
296,"[1, 11, 6]",L2_Ra12_Rb016_7,2,323,7,2.518322
1427,"[7, 16, 7]",L8_Ra18_Rb016_8,8,491,8,6.243362


# Step 6

In [19]:
# Get experiments data
n_step = 6
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
262,"[1, 8, 2]",L2_Ra09_Rb017_3,2,240,3,0.155014,0.69,0.68,0.685
202,"[1, 2, 2]",L2_Ra05_Rb017_3,2,128,3,0.265665,0.76,0.78,0.77
191,"[1, 1, 1]",L2_Ra05_Rb016_2,2,127,2,0.291876,0.87,0.88,0.875
709,"[3, 16, 9]",L4_Ra18_Rb016_10,4,491,10,1.029717,0.62,0.63,0.625
708,"[3, 16, 8]",L4_Ra18_Rb016_9,4,491,9,1.461957,0.63,0.62,0.625
297,"[1, 11, 7]",L2_Ra12_Rb016_8,2,323,8,2.25793,0.84,0.84,0.84
296,"[1, 11, 6]",L2_Ra12_Rb016_7,2,323,7,2.518322,0.82,0.8,0.81
1427,"[7, 16, 7]",L8_Ra18_Rb016_8,8,491,8,6.243362,0.58,0.58,0.58


In [None]:
# Hyper parameters
seed = np.random.RandomState(2021)
bounds = np.array([(1e-4, 20)]*4)
length = seed.uniform(bounds[:, 0], bounds[:, 1])
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 10,
    'constant_bounds': (1e-3, 20),
    'length_scale': length,
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.05
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step 6 results and fitting GPs
opt.tell(train_x, train_y)

In [207]:
# Generating the kappa paramaters
parallel_param = bo.kwargs_generator(mean=3, size=8)
pprint(parallel_param)

{'kappa': [0.11096284474854443,
           0.22623298164276803,
           1.1168441431893132,
           2.330633130824004,
           2.7658384202366046,
           3.9192803074422855,
           4.546544596682007,
           7.030079011800522]}


In [286]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
249,"[1, 6, 9]",L2_Ra09_Rb011_10,2,234,10,0.110963
269,"[1, 8, 9]",L2_Ra09_Rb017_10,2,240,10,0.226233
189,"[1, 0, 9]",L2_Ra05_Rb011_10,2,122,10,1.116844
188,"[1, 0, 8]",L2_Ra05_Rb011_9,2,122,9,2.330633
187,"[1, 0, 7]",L2_Ra05_Rb011_8,2,122,8,2.765838
218,"[1, 3, 8]",L2_Ra05_Rb018_9,2,129,9,3.91928
3248,"[18, 0, 8]",L21_Ra05_Rb011_9,21,122,9,4.546545
3428,"[19, 0, 8]",L22_Ra05_Rb011_9,22,122,9,7.030079


# Step 7

In [20]:
# Get experiments data
n_step = 7
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
249,"[1, 6, 9]",L2_Ra09_Rb011_10,2,234,10,0.110963,0.62,0.58,0.6
269,"[1, 8, 9]",L2_Ra09_Rb017_10,2,240,10,0.226233,0.7,0.52,0.61
189,"[1, 0, 9]",L2_Ra05_Rb011_10,2,122,10,1.116844,0.65,0.65,0.65
188,"[1, 0, 8]",L2_Ra05_Rb011_9,2,122,9,2.330633,0.74,0.71,0.725
187,"[1, 0, 7]",L2_Ra05_Rb011_8,2,122,8,2.765838,0.68,0.73,0.705
218,"[1, 3, 8]",L2_Ra05_Rb018_9,2,129,9,3.91928,0.67,0.62,0.645
3248,"[18, 0, 8]",L21_Ra05_Rb011_9,21,122,9,4.546545,0.52,0.55,0.535
3428,"[19, 0, 8]",L22_Ra05_Rb011_9,22,122,9,7.030079,0.19,0.17,0.18


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1,
    'constant_bounds': (1e-3, 20),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.06
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step results and fitting GPs
opt.tell(train_x, train_y)

In [135]:
# Generating the kappa parameters
parallel_param = bo.kwargs_generator(mean=3, size=8)
pprint(parallel_param)

{'kappa': [0.2788987762709123,
           0.5998674343441854,
           0.625516274664051,
           1.2971855387094955,
           2.656955826350025,
           2.679134249306771,
           4.052076499920827,
           9.628884457969205]}


In [242]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
197,"[1, 1, 7]",L2_Ra05_Rb016_8,2,127,8,0.278899
196,"[1, 1, 6]",L2_Ra05_Rb016_7,2,127,7,0.599867
298,"[1, 11, 8]",L2_Ra12_Rb016_9,2,323,9,0.625516
347,"[1, 16, 7]",L2_Ra18_Rb016_8,2,491,8,1.297186
328,"[1, 14, 8]",L2_Ra17_Rb016_9,2,463,9,2.656956
1378,"[7, 11, 8]",L8_Ra12_Rb016_9,8,323,9,2.679134
3777,"[20, 17, 7]",L23_Ra20_Rb016_8,23,519,8,4.052076
3737,"[20, 13, 7]",L23_Ra16_Rb020_8,23,439,8,9.628884


# Step 8

In [22]:
# Get experiments data
n_step = 8
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
197,"[1, 1, 7]",L2_Ra05_Rb016_8,2,127,8,0.278899,0.7,0.78,0.74
196,"[1, 1, 6]",L2_Ra05_Rb016_7,2,127,7,0.599867,0.85,0.79,0.82
298,"[1, 11, 8]",L2_Ra12_Rb016_9,2,323,9,0.625516,0.76,0.76,0.76
347,"[1, 16, 7]",L2_Ra18_Rb016_8,2,491,8,1.297186,0.65,0.65,0.65
328,"[1, 14, 8]",L2_Ra17_Rb016_9,2,463,9,2.656956,0.68,0.72,0.7
1378,"[7, 11, 8]",L8_Ra12_Rb016_9,8,323,9,2.679134,0.66,0.67,0.665
3777,"[20, 17, 7]",L23_Ra20_Rb016_8,23,519,8,4.052076,0.25,0.37,0.31
3737,"[20, 13, 7]",L23_Ra16_Rb020_8,23,439,8,9.628884,0.0,0.0,0.0


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 1.2,
    'constant_bounds': (1e-3, 20),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.05
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step results and fitting GPs
opt.tell(train_x, train_y)

In [237]:
# Generating the kappa parameters
# Increasing exploration
parallel_param = bo.kwargs_generator(mean=5, size=8)
pprint(parallel_param)

{'kappa': [1.1764885233970097,
           1.8265591967530508,
           2.3164412380746358,
           3.027186191284607,
           6.78706817942249,
           7.776555996479301,
           17.786162696828317,
           18.947567676759192]}


In [285]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
199,"[1, 1, 9]",L2_Ra05_Rb016_10,2,127,10,1.176489
299,"[1, 11, 9]",L2_Ra12_Rb016_10,2,323,10,1.826559
195,"[1, 1, 5]",L2_Ra05_Rb016_6,2,127,6,2.316441
252,"[1, 7, 2]",L2_Ra09_Rb016_3,2,239,3,3.027186
1271,"[7, 1, 1]",L8_Ra05_Rb016_2,8,127,2,6.787068
1262,"[7, 0, 2]",L8_Ra05_Rb011_3,8,122,3,7.776556
2628,"[14, 10, 8]",L15_Ra11_Rb016_9,15,295,9,17.786163
1548,"[8, 10, 8]",L9_Ra11_Rb016_9,9,295,9,18.947568


# Step 9

In [23]:
# Get experiments data
n_step = 9
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
199,"[1, 1, 9]",L2_Ra05_Rb016_10,2,127,10,1.176489,0.8,0.8,0.8
299,"[1, 11, 9]",L2_Ra12_Rb016_10,2,323,10,1.826559,0.82,0.79,0.805
195,"[1, 1, 5]",L2_Ra05_Rb016_6,2,127,6,2.316441,0.84,0.85,0.845
252,"[1, 7, 2]",L2_Ra09_Rb016_3,2,239,3,3.027186,0.77,0.8,0.785
1271,"[7, 1, 1]",L8_Ra05_Rb016_2,8,127,2,6.787068,0.78,0.79,0.785
1262,"[7, 0, 2]",L8_Ra05_Rb011_3,8,122,3,7.776556,0.68,0.71,0.695
2628,"[14, 10, 8]",L15_Ra11_Rb016_9,15,295,9,17.786163,0.0,0.0,0.0
1548,"[8, 10, 8]",L9_Ra11_Rb016_9,9,295,9,18.947568,0.0,0.0,0.0


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 2.5,
    'constant_bounds': (1e-3, 20),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.05
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step results and fitting GPs
opt.tell(train_x, train_y)

In [26]:
# Generating the kappa parameters
# Increasing exploration
parallel_param = bo.kwargs_generator(mean=5, size=8)
pprint(parallel_param)

{'kappa': [0.42633327740579147,
           1.892327078649767,
           2.521906049793312,
           3.3295405559370055,
           3.5410028347036087,
           5.339547763688298,
           7.939640255461042,
           9.261748957429736]}


In [27]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
192,"[1, 1, 2]",L2_Ra05_Rb016_3,2,127,3,0.426333
181,"[1, 0, 1]",L2_Ra05_Rb011_2,2,122,2,1.892327
658,"[3, 11, 8]",L4_Ra12_Rb016_9,4,323,9,2.521906
659,"[3, 11, 9]",L4_Ra12_Rb016_10,4,323,10,3.329541
3251,"[18, 1, 1]",L21_Ra05_Rb016_2,21,127,2,3.541003
1286,"[7, 2, 6]",L8_Ra05_Rb017_7,8,128,7,5.339548
3352,"[18, 11, 2]",L21_Ra12_Rb016_3,21,323,3,7.93964
3282,"[18, 4, 2]",L21_Ra05_Rb020_3,21,131,3,9.261749


# Step 10

In [24]:
# Get experiments data
n_step = 10
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
192,"[1, 1, 2]",L2_Ra05_Rb016_3,2,127,3,0.426333,0.84,0.9,0.87
181,"[1, 0, 1]",L2_Ra05_Rb011_2,2,122,2,1.892327,0.75,0.8,0.775
658,"[3, 11, 8]",L4_Ra12_Rb016_9,4,323,9,2.521906,0.79,0.76,0.775
659,"[3, 11, 9]",L4_Ra12_Rb016_10,4,323,10,3.329541,0.75,0.75,0.75
3251,"[18, 1, 1]",L21_Ra05_Rb016_2,21,127,2,3.541003,0.48,0.24,0.36
1286,"[7, 2, 6]",L8_Ra05_Rb017_7,8,128,7,5.339548,0.57,0.56,0.565
3352,"[18, 11, 2]",L21_Ra12_Rb016_3,21,323,3,7.93964,0.57,0.68,0.625
3282,"[18, 4, 2]",L21_Ra05_Rb020_3,21,131,3,9.261749,0.34,0.18,0.26


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 0.2,
    'constant_bounds': (1e-3, 20),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.05
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step results and fitting GPs
opt.tell(train_x, train_y)

In [69]:
# Generating the kappa parameters
# Increasing exploration
parallel_param = bo.kwargs_generator(mean=5, size=8)
pprint(parallel_param)

{'kappa': [0.7164614036069894,
           1.2434038903279827,
           1.7656962298882493,
           3.672435447258228,
           4.630839461628796,
           8.872462641187354,
           11.49280006575042,
           16.498663800325218]}


In [76]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
194,"[1, 1, 4]",L2_Ra05_Rb016_5,2,127,5,0.716461
193,"[1, 1, 3]",L2_Ra05_Rb016_4,2,127,4,1.243404
292,"[1, 11, 2]",L2_Ra12_Rb016_3,2,323,3,1.765696
291,"[1, 11, 1]",L2_Ra12_Rb016_2,2,323,2,3.672435
290,"[1, 11, 0]",L2_Ra12_Rb016_1,2,323,1,4.630839
111,"[0, 11, 1]",L1_Ra12_Rb016_2,1,323,2,8.872463
112,"[0, 11, 2]",L1_Ra12_Rb016_3,1,323,3,11.4928
1010,"[5, 11, 0]",L6_Ra12_Rb016_1,6,323,1,16.498664


# Step 11

In [25]:
# Get experiments data
n_step = 11
df_exp = pd.read_excel('../data/opt_conditions/exp_results.xlsx', sheet_name=n_step, index_col=0)
train_x = df_exp.index.values.reshape(-1, 1)
train_y = df_exp['yield'].values.reshape(-1, 1)
test_x = np.array([i for i in range(4500)]).reshape(-1, 1)
df_exp

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa,Yield 1,Yield 2,yield
194,"[1, 1, 4]",L2_Ra05_Rb016_5,2,127,5,0.716461,0.91,0.84,0.875
193,"[1, 1, 3]",L2_Ra05_Rb016_4,2,127,4,1.243404,0.88,0.88,0.88
292,"[1, 11, 2]",L2_Ra12_Rb016_3,2,323,3,1.765696,0.79,0.76,0.775
291,"[1, 11, 1]",L2_Ra12_Rb016_2,2,323,2,3.672435,0.74,0.75,0.745
290,"[1, 11, 0]",L2_Ra12_Rb016_1,2,323,1,4.630839,0.62,0.63,0.625
111,"[0, 11, 1]",L1_Ra12_Rb016_2,1,323,2,8.872463,0.71,0.71,0.71
112,"[0, 11, 2]",L1_Ra12_Rb016_3,1,323,3,11.4928,0.82,0.77,0.795
1010,"[5, 11, 0]",L6_Ra12_Rb016_1,6,323,1,16.498664,0.07,0.21,0.14


In [None]:
# Hyper parameters
gpr_kwargs = {
    'kernel': 'precomputed',
    'constant': 0.3,
    'constant_bounds': (1e-3, 20),
    'length_scale': [1, 1, 1, 1],
    'length_scale_bounds': (1e-4, 20),
    'noise': 0.1
}
with open('../data/opt_conditions/condition_opt.pkl', 'rb') as file:
    opt = pickle.load(file)
opt.base_estimator = GPR(kernel_matrix=dist_matrix, **gpr_kwargs)
# Tell step results and fitting GPs
opt.tell(train_x, train_y)

In [68]:
# Generating the kappa parameters
# Increasing exploration
parallel_param = bo.kwargs_generator(mean=5, size=8)
pprint(parallel_param)

{'kappa': [0.8744743152599881,
           2.3221902026850745,
           3.3760449446005634,
           3.7332649550073445,
           4.110968960094722,
           10.772431400170085,
           11.404995842205498,
           11.680712792854536]}


In [69]:
next_x = opt.parallel_ask(acq_func_args=parallel_param, num_samples=1)
next_id = []
for suggestion in next_x:
    idx = suggestion[0][0][0]
    next_id.append(idx)
df_suggestion = df_reactions.loc[next_id, :]
df_suggestion.loc[:, 'kappa'] = parallel_param['kappa']
df_suggestion

Unnamed: 0,idx,name,ligand,molecule_id,Ni,kappa
294,"[1, 11, 4]",L2_Ra12_Rb016_5,2,323,5,0.874474
1092,"[6, 1, 2]",L7_Ra05_Rb016_3,7,127,3,2.32219
1194,"[6, 11, 4]",L7_Ra12_Rb016_5,7,323,5,3.376045
1112,"[6, 3, 2]",L7_Ra05_Rb018_3,7,129,3,3.733265
1111,"[6, 3, 1]",L7_Ra05_Rb018_2,7,129,2,4.110969
49,"[0, 4, 9]",L1_Ra05_Rb020_10,1,131,10,10.772431
3109,"[17, 4, 9]",L20_Ra05_Rb020_10,20,131,10,11.404996
1210,"[6, 13, 0]",L7_Ra16_Rb020_1,7,439,1,11.680713


In [70]:
# Save data and instance
with pd.ExcelWriter('../data/opt_conditions/condition_suggestion.xlsx', mode='a') as excel_writer: 
    df_suggestion.to_excel(excel_writer, sheet_name='step_{}'.format(n_step + 1))
# Remove the kernel matrix to save drive space
opt.base_estimator.k_matrix = None
for estimator in opt.gpr:
    estimator.k_matrix = None
with open('../data/opt_conditions/condition_opt.pkl', 'wb') as file:
    pickle.dump(opt, file)