<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Optimization" data-toc-modified-id="Optimization-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Optimization</a></span><ul class="toc-item"><li><span><a href="#Setup" data-toc-modified-id="Setup-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Setup</a></span></li><li><span><a href="#Single-Task-Optimization-A" data-toc-modified-id="Single-Task-Optimization-A-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Single Task Optimization A</a></span><ul class="toc-item"><li><span><a href="#Generate-Suggestions" data-toc-modified-id="Generate-Suggestions-1.2.1"><span class="toc-item-num">1.2.1&nbsp;&nbsp;</span>Generate Suggestions</a></span></li></ul></li><li><span><a href="#Multitask-Experiments-B" data-toc-modified-id="Multitask-Experiments-B-1.3"><span class="toc-item-num">1.3&nbsp;&nbsp;</span>Multitask Experiments B</a></span><ul class="toc-item"><li><span><a href="#Generate-Suggestions" data-toc-modified-id="Generate-Suggestions-1.3.1"><span class="toc-item-num">1.3.1&nbsp;&nbsp;</span>Generate Suggestions</a></span></li><li><span><a href="#Test-STBO" data-toc-modified-id="Test-STBO-1.3.2"><span class="toc-item-num">1.3.2&nbsp;&nbsp;</span>Test STBO</a></span></li></ul></li><li><span><a href="#LCM-Kernel" data-toc-modified-id="LCM-Kernel-1.4"><span class="toc-item-num">1.4&nbsp;&nbsp;</span>LCM Kernel</a></span></li></ul></li><li><span><a href="#Weighted-MTBO" data-toc-modified-id="Weighted-MTBO-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Weighted MTBO</a></span></li></ul></div>

# Optimization

From the menu above, select "Runtime" -> "Run All".

If you've already run this noteboook recently, click on "Single Task Optimization", then click "Runtime" -> "Run After."

## Setup

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# path to multitask folder

import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/Users/dsw46/Library/CloudStorage/OneDrive-UniversityofCambridge/Cambridge/Projects/MTBO/multitask')

In [3]:
from summit import *
from multitask import *
import pandas as pd
import torch
import gpytorch
import hiplot as hip

## Single Task Optimization A

Import data

In [30]:
df = pd.read_csv("../data/experiments/ch_activation_experiments_a.csv", skiprows=1)

In [31]:
num_experiments = 1

### Generate Suggestions

In [32]:
# Transform data
categorical_columns = ["Solvent", "Ligand"]
for col in categorical_columns:
    df[col] = df[col].str.split(r" \(\d\)", expand=True)[0]
for col in df.columns:
    if "Unnamed" in col:
        df = df.drop(col, axis=1)
df = df.rename(columns={"ResT /min": "ResT", "Temp /°C": "Temp", "Mol%": "Mol", "Yield /%": "yld"} )
ds  = DataSet.from_df(df)
ds

Unnamed: 0,Data file,Type,Solvent,Ligand,ResT,Temp,Mol,yld,SM area,Product area,...,Volume /mL,Conc SM /M,Mol% catalyst,SM /g,Pd(OAc)2 /g,Ligand Mr,Ligand /g,NEt3 /mL,Biphenyl /g,Biphenyl /M
0,TrainingData001,Training,DMSO,DPEPhos,60,140,10,46.09,,3591526.0,...,5.0,0.1,10.0,0.1008,0.0112,538.5,0.05385,0.104545,0.031,0.04026
1,TrainingData002,Training,DMSO,JohnPhos,45,70,3,1.31,2704169.0,260938.0,...,5.0,0.1,3.0,0.1008,0.00336,298.4,0.008952,0.104545,0.0263,0.034156
2,TrainingData003,Training,Toluene,DPEPhos,42,61,8,0.0,,,...,5.0,0.1,8.0,0.1008,0.00896,538.5,0.04308,0.104545,0.0245,0.031818
3,TrainingData004,Training,DMA,JohnPhos,9,135,5,37.91,1333228.0,4837572.0,...,5.0,0.1,5.0,0.1008,0.0056,298.4,0.01492,0.104545,0.0276,0.035844
4,TrainingData005,Training,MeCN,JohnPhos,11,134,2,16.87,1614954.0,2408002.0,...,5.0,0.1,2.0,0.1008,0.00224,298.4,0.005968,0.104545,0.0265,0.034416
5,TrainingData006,Training,MeCN,JohnPhos,19,80,5,69.7,723374.0,16037972.0,...,5.0,0.1,5.0,0.1008,0.0056,298.4,0.01492,0.104545,0.026,0.033766
6,TrainingData007,Training,NMP,JohnPhos,47,143,4,40.92,,7980309.0,...,5.0,0.1,4.0,0.1008,0.00448,298.4,0.011936,0.104545,0.0302,0.039221
7,TrainingData008,Training,DMSO,SPhos,29,122,4,0.0,1001820.0,0.0,...,5.0,0.1,4.0,0.1008,0.00448,410.5,0.01642,0.104545,0.027,0.035065
8,TrainingData009,Training,NMP,Xphos,52,112,8,55.69,647744.0,10707780.0,...,5.0,0.1,8.0,0.1008,0.00896,476.7,0.038136,0.104545,0.028,0.036364
9,,Training,NMP,DPEPhos,26,53,6,0.0,,,...,5.0,0.1,6.0,0.1008,0.00672,,0.0,0.104545,,0.0


In [33]:
# Create domain
domain = Domain()

# Solvents: Toluene (1), DMA (2), MeCN (3), DMSO (4), NMP (5)
# Ligand: JohnPhos (1), Sphos (2), Xphos (3), DPEPhos (4)
# ResT: 5 - 60 mins
# Temp: 50 - 150 deg
# Mol%: 2 - 10 %
domain += CategoricalVariable(
    "Solvent",
    "Solvent used for the reaction", 
    levels=["Toluene", "DMA", "MeCN", "DMSO", "NMP"]
)
domain += CategoricalVariable(
    "Ligand",
    "Ligand used for the reaction",
    levels=["JohnPhos", "SPhos", "Xphos", "DPEPhos"]
)
domain += ContinuousVariable(
    "ResT",
    "Residence Time (minutes)",
    bounds=(5, 60)
)
domain += ContinuousVariable(
    "Temp",
    "Reaction temperature in deg C",
    bounds=(50,150)
)
domain += ContinuousVariable(
    "Mol",
    "Catalyst mol percent",
    bounds=(2,10)
)
domain += ContinuousVariable(
    "yld",
    "Reaction yield",
    bounds=(0, 100),
    is_objective=True,
    maximize=True
)
print("Domain")
domain

Domain


0,1,2,3
Name,Type,Description,Values
Solvent,"categorical, input",Solvent used for the reaction,5 levels
Ligand,"categorical, input",Ligand used for the reaction,4 levels
ResT,"continuous, input",Residence Time (minutes),"[5,60]"
Temp,"continuous, input",Reaction temperature in deg C,"[50,150]"
Mol,"continuous, input",Catalyst mol percent,"[2,10]"
yld,"continuous, maximize objective",Reaction yield,"[0,100]"


In [34]:
cat_mappings = {}
cat_dimensions = []
for i, v in enumerate(domain.input_variables):
    if v.variable_type == "categorical":
        cat_mapping = {l: i for i, l in enumerate(v.levels)}
        cat_mappings[v.name] = cat_mapping
        cat_dimensions.append(i)

In [35]:
combos = domain.get_categorical_combinations()
for v in domain.input_variables:
    if v.variable_type == "categorical":
        combos[v.name] = combos[v.name].replace(cat_mappings[v.name])

In [36]:
strategy = NewSTBO(domain, acquisition_function="qNEI", categorical_method=None, brute_force_categorical=True)
suggestions = strategy.suggest_experiments(int(num_experiments), prev_res=ds)
suggestions = suggestions.round(0)
suggestions

Unnamed: 0,Solvent,Ligand,ResT,Temp,Mol,strategy
0,MeCN,JohnPhos,57.0,80.0,9.0,STBO


In [37]:
from botorch.optim import optimize_acqf, optimize_acqf_mixed
inputs, output = strategy.transform.transform_inputs_outputs(
    ds,
    categorical_method=None,
    # standardize_inputs=True,
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
    # standardize_outputs=True,
)
cat_mappings = {}
cat_dimensions = []
for i, v in enumerate(strategy.domain.input_variables):
    if v.variable_type == "categorical":
        cat_mapping = {l: i for i, l in enumerate(v.levels)}
        inputs[v.name] = inputs[v.name].replace(cat_mapping)
        cat_mappings[v.name] = cat_mapping
        cat_dimensions.append(i)
fixed_features_list = []
for k, combo in combos.iterrows():
    fixed_features_list.append(
        {dim: combo[i] for i, dim in enumerate(cat_dimensions)}
    )

ff_candidate_list, ff_acq_value_list = [], []
for fixed_features in fixed_features_list:
    candidate, acq_value = optimize_acqf(
        acq_function=strategy.acq,
        bounds=strategy._get_bounds(),
        q=1,
        num_restarts=100,
        raw_samples=2000,
        fixed_features=fixed_features,
        return_best_only=False,
    )
    ff_candidate_list.append(candidate)
    ff_acq_value_list.append(acq_value)
    
ff_candidate_list = torch.cat(ff_candidate_list)
# ff_acq_values = torch.stack(ff_acq_value_list)
X = pd.DataFrame(
    ff_candidate_list.squeeze().numpy(), 
    columns=[v.name for v in strategy.domain.input_variables]
)
X = DataSet.from_df(X)


for i, v in enumerate(strategy.domain.input_variables):
    if v.variable_type == "categorical":
        cat_mapping = {i: l for i, l in enumerate(v.levels)}
        X[v.name] = X[v.name].replace(cat_mapping)

X = strategy.transform.un_transform(
    X,
    categorical_method=None,
    min_max_scale_inputs=True,
    min_max_scale_outputs=True,
)
X = pd.DataFrame(
    X.values,
    columns=[v.name for v in strategy.domain.input_variables]
)
with torch.no_grad():
    acq = strategy.acq(ff_candidate_list)
X["acq"] = acq

with torch.no_grad():
    posterior = strategy.model.posterior(ff_candidate_list)
    samples = [posterior.sample() for i in range(100)]

samples = torch.stack(samples).squeeze()
avg = samples.mean(axis=0)
std = samples.std(axis=0)

X["y_mean"] = avg
X["y_std"] = std

In [38]:
import hiplot as hip
hexp = hip.Experiment.from_dataframe(X.sort_values("acq", ascending=False))
hexp.display_data(hip.Displays.PARALLEL_PLOT).update({"hide":["uid"]})
hexp.display()

<IPython.core.display.Javascript object>

<hiplot.ipython.IPythonExperimentDisplayed at 0x13787e460>

In [39]:
strategy = NewSTBO(domain)
suggestions = strategy.suggest_experiments(int(num_experiments), prev_res=ds)
suggestions = suggestions.round(0)
suggestions

  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


Unnamed: 0,ResT,Temp,Mol,Solvent,Ligand,strategy
0,60.0,83.0,9.0,NMP,Xphos,STBO


In [40]:
from botorch.optim import optimize_acqf, optimize_acqf_mixed
inputs, output = strategy.transform.transform_inputs_outputs(
    ds,
    categorical_method="one-hot",
    # standardize_inputs=True,
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
    # standardize_outputs=True,
)
candidates, acq_values= optimize_acqf(
    acq_function=strategy.acq,
    bounds=strategy._get_bounds(),
    num_restarts=100,
    q=1,
    raw_samples=2000,
    return_best_only=False
)

res = DataSet(candidates.squeeze(), columns=inputs.data_columns)
X = strategy.transform.un_transform(
    res,
    categorical_method="one-hot",
    min_max_scale_inputs=True,
    min_max_scale_outputs=True,
)

X = pd.DataFrame(
    X.values,
    columns=X.data_columns
)
with torch.no_grad():
    acq = strategy.acq(candidates)
X["acq"] = acq

with torch.no_grad():
    posterior = strategy.model.posterior(candidates)
    samples = [posterior.sample() for i in range(100)]

samples = torch.stack(samples).squeeze()
avg = samples.mean(axis=0)
std = samples.std(axis=0)

X["y_mean"] = avg
X["y_std"] = std

  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


In [41]:
import hiplot as hip
hexp = hip.Experiment.from_dataframe(X.sort_values("acq", ascending=False))
hexp.display_data(hip.Displays.PARALLEL_PLOT).update({"hide":["uid"]})
hexp.display()

<IPython.core.display.Javascript object>

<hiplot.ipython.IPythonExperimentDisplayed at 0x137a82370>

In [42]:
strategy.model.covar_module.base_kernel.lengthscale

tensor([[0.3344, 0.3346, 0.3328, 0.3313, 0.3313, 0.3340, 0.3347, 0.3339, 0.3321,
         0.3596, 0.3410, 0.3233]], dtype=torch.float64,
       grad_fn=<SoftplusBackward0>)

## Multitask Experiments B

In [4]:
df_a = pd.read_csv("../data/experiments/ch_activation_experiments_a.csv", skiprows=1)
df_b = pd.read_csv("../data/experiments/ch_activation_experiments_b.csv", skiprows=1)

In [5]:
num_experiments = 1

In [12]:
df_a

Unnamed: 0,Data file,Type,Solvent,Ligand,ResT /min,Temp /°C,Mol%,Yield /%,SM area,Product area,...,Volume /mL,Conc SM /M,Mol% catalyst,SM /g,Pd(OAc)2 /g,Ligand Mr,Ligand /g,NEt3 /mL,Biphenyl /g,Biphenyl /M
0,TrainingData001,Training,DMSO (4),DPEPhos (4),60,140,10,46.09,,3591526.0,...,5.0,0.1,10.0,0.1008,0.0112,538.5,0.05385,0.104545,0.031,0.04026
1,TrainingData002,Training,DMSO (4),JohnPhos (1),45,70,3,1.31,2704169.0,260938.0,...,5.0,0.1,3.0,0.1008,0.00336,298.4,0.008952,0.104545,0.0263,0.034156
2,TrainingData003,Training,Toluene (1),DPEPhos (4),42,61,8,0.0,,,...,5.0,0.1,8.0,0.1008,0.00896,538.5,0.04308,0.104545,0.0245,0.031818
3,TrainingData004,Training,DMA (2),JohnPhos (1),9,135,5,37.91,1333228.0,4837572.0,...,5.0,0.1,5.0,0.1008,0.0056,298.4,0.01492,0.104545,0.0276,0.035844
4,TrainingData005,Training,MeCN (3),JohnPhos (1),11,134,2,16.87,1614954.0,2408002.0,...,5.0,0.1,2.0,0.1008,0.00224,298.4,0.005968,0.104545,0.0265,0.034416
5,TrainingData006,Training,MeCN (3),JohnPhos (1),19,80,5,69.7,723374.0,16037972.0,...,5.0,0.1,5.0,0.1008,0.0056,298.4,0.01492,0.104545,0.026,0.033766
6,TrainingData007,Training,NMP (5),JohnPhos (1),47,143,4,40.92,,7980309.0,...,5.0,0.1,4.0,0.1008,0.00448,298.4,0.011936,0.104545,0.0302,0.039221
7,TrainingData008,Training,DMSO (4),SPhos (2),29,122,4,0.0,1001820.0,0.0,...,5.0,0.1,4.0,0.1008,0.00448,410.5,0.01642,0.104545,0.027,0.035065
8,TrainingData009,Training,NMP (5),Xphos (3),52,112,8,55.69,647744.0,10707780.0,...,5.0,0.1,8.0,0.1008,0.00896,476.7,0.038136,0.104545,0.028,0.036364
9,,Training,NMP (5),DPEPhos (4),26,53,6,0.0,,,...,5.0,0.1,6.0,0.1008,0.00672,,0.0,0.104545,,0.0


### Generate Suggestions

In [6]:
# Transform data
def transform_data(df):
    categorical_columns = ["Solvent", "Ligand"]
    for col in categorical_columns:
        df[col] = df[col].str.split(r" \(\d\)", expand=True)[0]
    for col in df.columns:
        if "Unnamed" in col:
            df = df.drop(col, axis=1)
    df = df.rename(columns={"ResT /min": "ResT", "Temp /°C": "Temp", "Mol%": "Mol", "Yield /%": "yld"} )
    ds  = DataSet.from_df(df)
    return ds

ds_a = transform_data(df_a)
ds_b = transform_data(df_b)
ds_a[("task", "METADATA")] = 0
ds_b[("task", "METADATA")] = 1

In [7]:
# Create domain
domain = Domain()

# Solvents: Toluene (1), DMA (2), MeCN (3), DMSO (4), NMP (5)
# Ligand: JohnPhos (1), Sphos (2), Xphos (3), DPEPhos (4)
# ResT: 5 - 60 mins
# Temp: 50 - 150 deg
# Mol%: 2 - 10 %
domain += CategoricalVariable(
    "Solvent",
    "Solvent used for the reaction", 
    levels=["Toluene", "DMA", "MeCN", "DMSO", "NMP"]
)
domain += CategoricalVariable(
    "Ligand",
    "Ligand used for the reaction",
    levels=["JohnPhos", "SPhos", "Xphos", "DPEPhos"]
)
domain += ContinuousVariable(
    "ResT",
    "Residence Time (minutes)",
    bounds=(5, 60)
)
domain += ContinuousVariable(
    "Temp",
    "Reaction temperature in deg C",
    bounds=(50,150)
)
domain += ContinuousVariable(
    "Mol",
    "Catalyst mol percent",
    bounds=(2,10)
)
domain += ContinuousVariable(
    "yld",
    "Reaction yield",
    bounds=(0, 100),
    is_objective=True,
    maximize=True
)
print("Domain")
domain

Domain


0,1,2,3
Name,Type,Description,Values
Solvent,"categorical, input",Solvent used for the reaction,5 levels
Ligand,"categorical, input",Ligand used for the reaction,4 levels
ResT,"continuous, input",Residence Time (minutes),"[5,60]"
Temp,"continuous, input",Reaction temperature in deg C,"[50,150]"
Mol,"continuous, input",Catalyst mol percent,"[2,10]"
yld,"continuous, maximize objective",Reaction yield,"[0,100]"


In [8]:
ds_b = ds_b.drop([7,8], axis=0)

In [23]:
strategy = NewMTBO(
    domain,
    pretraining_data=ds_a, 
    acquisition_function="qNEI",
    brute_force_categorical=True,
#     model_type=NewMTBO.LCM,
)

# tempor
suggestions = strategy.suggest_experiments(int(num_experiments), prev_res=ds_b)
suggestions = suggestions.round(0)
suggestions

  data = self.all_experiments.append(self.pretraining_data)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


Unnamed: 0,ResT,Temp,Mol,Solvent,Ligand,strategy,task
0,22.0,97.0,5.0,MeCN,JohnPhos,MTBO,1


In [24]:
from botorch.optim import optimize_acqf
from botorch.acquisition.acquisition import AcquisitionFunction
from torch import Tensor
from typing import List, Dict, Optional, Callable, Any

def optimize_acqf_mixed(
    acq_function: AcquisitionFunction,
    bounds: Tensor,
    q: int,
    num_restarts: int,
    fixed_features_list: List[Dict[int, float]],
    raw_samples: Optional[int] = None,
    options: Optional[Dict[str, Union[bool, float, int, str]]] = None,
    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    post_processing_func: Optional[Callable[[Tensor], Tensor]] = None,
    batch_initial_conditions: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tuple[Tensor, Tensor]:
    if q == 1:
        ff_candidate_list, ff_acq_value_list = [], []
        for fixed_features in fixed_features_list:
            candidate, acq_value = optimize_acqf(
                acq_function=acq_function,
                bounds=bounds,
                q=q,
                num_restarts=num_restarts,
                raw_samples=raw_samples,
                options=options or {},
                inequality_constraints=inequality_constraints,
                equality_constraints=equality_constraints,
                fixed_features=fixed_features,
                post_processing_func=post_processing_func,
                batch_initial_conditions=batch_initial_conditions,
                return_best_only=False,
            )
            ff_candidate_list.append(candidate)
            ff_acq_value_list.append(acq_value)

        ff_acq_values = torch.cat(ff_acq_value_list)
        best = torch.argmax(ff_acq_values)
        return ff_candidate_list, ff_acq_values


In [25]:
# from botorch.optim import optimize_acqf, optimize_acqf_mixed
inputs, output = strategy.transform.transform_inputs_outputs(
    ds_a.append(ds_b),
    categorical_method="one-hot",
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
)
fixed_features_list = strategy._get_fixed_features()
candidates, acq_values= optimize_acqf_mixed(
    acq_function=strategy.acq,
    bounds=strategy._get_bounds(),
    fixed_features_list=fixed_features_list,
    num_restarts=100,
    q=1,
    raw_samples=2000,
    return_best_only=False
)


  ds_a.append(ds_b),
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)


In [26]:
candidates_stacked = torch.cat([c.squeeze() for c in candidates])

In [27]:
res = DataSet(candidates_stacked, columns=inputs.data_columns)
X = strategy.transform.un_transform(
    res,
    categorical_method="one-hot",
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
)

X = pd.DataFrame(
    X.values,
    columns=X.data_columns
)
# with torch.no_grad():
#     acq = strategy.acq(candidates)
X["acq"] = acq_values

with torch.no_grad():
    posterior = strategy.model.posterior(candidates_stacked)
    samples = [posterior.sample() for i in range(100)]

samples = torch.stack(samples).squeeze()
avg = samples.mean(axis=0)
std = samples.std(axis=0)

X["y_mean"] = avg
X["y_std"] = std

  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


In [28]:
hexp = hip.Experiment.from_dataframe(X.sort_values("acq", ascending=False))
hexp.display_data(hip.Displays.PARALLEL_PLOT).update({"hide":["uid"]})
hexp.display()

<IPython.core.display.Javascript object>

<hiplot.ipython.IPythonExperimentDisplayed at 0x13767b0a0>

In [29]:
strategy.model.covar_module.base_kernel.lengthscale

tensor([[0.3341, 0.3357, 0.3335, 0.3300, 0.3324, 0.3334, 0.3348, 0.3335, 0.3323,
         0.3652, 0.3811, 0.1933]], dtype=torch.float64,
       grad_fn=<SoftplusBackward0>)

### Test STBO

In [7]:
strategy = NewSTBO(
    domain,
    acquisition_function="qNEI",
    brute_force_categorical=True
)

suggestions = strategy.suggest_experiments(int(num_experiments), prev_res=ds_b)
suggestions = suggestions.round(0)
suggestions

  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)
  return torch.tensor(bounds).T.double()
  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


Unnamed: 0,ResT,Temp,Mol,Solvent,Ligand,strategy
0,28.0,102.0,7.0,MeCN,JohnPhos,STBO


In [8]:
from botorch.optim import optimize_acqf
from botorch.acquisition.acquisition import AcquisitionFunction
from torch import Tensor
from typing import List, Dict, Optional, Callable, Any

def optimize_acqf_mixed(
    acq_function: AcquisitionFunction,
    bounds: Tensor,
    q: int,
    num_restarts: int,
    fixed_features_list: List[Dict[int, float]],
    raw_samples: Optional[int] = None,
    options: Optional[Dict[str, Union[bool, float, int, str]]] = None,
    inequality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    equality_constraints: Optional[List[Tuple[Tensor, Tensor, float]]] = None,
    post_processing_func: Optional[Callable[[Tensor], Tensor]] = None,
    batch_initial_conditions: Optional[Tensor] = None,
    **kwargs: Any,
) -> Tuple[Tensor, Tensor]:
    if q == 1:
        ff_candidate_list, ff_acq_value_list = [], []
        for fixed_features in fixed_features_list:
            candidate, acq_value = optimize_acqf(
                acq_function=acq_function,
                bounds=bounds,
                q=q,
                num_restarts=num_restarts,
                raw_samples=raw_samples,
                options=options or {},
                inequality_constraints=inequality_constraints,
                equality_constraints=equality_constraints,
                fixed_features=fixed_features,
                post_processing_func=post_processing_func,
                batch_initial_conditions=batch_initial_conditions,
                return_best_only=False,
            )
            ff_candidate_list.append(candidate)
            ff_acq_value_list.append(acq_value)

        ff_acq_values = torch.cat(ff_acq_value_list)
        best = torch.argmax(ff_acq_values)
        return ff_candidate_list, ff_acq_values

In [9]:
# from botorch.optim import optimize_acqf, optimize_acqf_mixed
inputs, output = strategy.transform.transform_inputs_outputs(
    ds_b,
    categorical_method="one-hot",
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
)
fixed_features_list = strategy._get_fixed_features()
candidates, acq_values= optimize_acqf_mixed(
    acq_function=strategy.acq,
    bounds=strategy._get_bounds(),
    fixed_features_list=fixed_features_list,
    num_restarts=100,
    q=1,
    raw_samples=2000,
    return_best_only=False
)

  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)


In [11]:
candidates_stacked = torch.cat([c.squeeze() for c in candidates])

In [12]:
res = DataSet(candidates_stacked, columns=inputs.data_columns)
X = strategy.transform.un_transform(
    res,
    categorical_method="one-hot",
    min_max_scale_inputs=True,
    min_max_scale_outputs=True
)

X = pd.DataFrame(
    X.values,
    columns=X.data_columns
)
X["acq"] = acq_values

with torch.no_grad():
    posterior = strategy.model.posterior(candidates_stacked)
    samples = [posterior.sample() for i in range(100)]

samples = torch.stack(samples).squeeze()
avg = samples.mean(axis=0)
std = samples.std(axis=0)

X["y_mean"] = avg
X["y_std"] = std

  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


In [13]:
hexp = hip.Experiment.from_dataframe(X.sort_values("acq", ascending=False))
hexp.display_data(hip.Displays.PARALLEL_PLOT).update({"hide":["uid"]})
hexp.display()

<IPython.core.display.Javascript object>

<hiplot.ipython.IPythonExperimentDisplayed at 0x16944c5b0>

## LCM Kernel

In [22]:
from multitask.mixed_gp_regression import LCMMultitaskGP

# Weighted MTBO

In [25]:
strategy = NewMTBO(
    domain,
    pretraining_data=ds_a, 
    acquisition_function="WeightedEI",
    brute_force_categorical=True,
#     model_type=NewMTBO.LCM,
)

# tempor
suggestions = strategy.suggest_experiments(int(num_experiments), prev_res=ds_b)
suggestions = suggestions.round(0)
suggestions

  data = self.all_experiments.append(self.pretraining_data)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(variable.name, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)
  new_ds = new_ds.drop(one_hot_names, axis=1)


Unnamed: 0,ResT,Temp,Mol,Solvent,Ligand,strategy,task
0,25.0,81.0,4.0,MeCN,JohnPhos,MTBO,1
