# Bayesian Optimization with BoTorch

In [None]:
import os
import time
import yaml
import pickle
import torch
from botorch.models import SingleTaskGP
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition import ExpectedImprovement
from botorch.optim import optimize_acqf
from gpytorch.mlls import ExactMarginalLogLikelihood

from bash_config import write_bash  # Provided in repository
import darkfield.mmmUtils_v2 as mu

N_SIM_POINTS = 1000


def generate_yaml(template_path, output_path, param_value, base_index=1):
    with open(template_path) as f:
        ip = yaml.safe_load(f)

    if param_value == 0:
        ip['O2']['in'] = 0
    else:
        ip['O2']['in'] = 1
        ip['O2']['size'] = float(param_value * 1e-6)

    filename = f"BO_{base_index}.yaml"
    fullpath = os.path.join(output_path, filename)
    with open(fullpath, 'w') as f_out:
        yaml.dump(ip, f_out, sort_keys=False)
    return filename, fullpath


def submit_job(yaml_filename, sim_index, n_sim_points=N_SIM_POINTS, n_cpus=24, mem='600GB'):
    bash_dir = '/home/yu79deg/darkfield_p5438/bash'
    bash_path = write_bash(path=bash_dir, N=n_sim_points, upd_params={'n_cpus': n_cpus, 'mem': mem, 'yaml': yaml_filename})
    os.system(f'sbatch {bash_path}')


def wait_for_completion(jobname, timeout=3600, check_interval=60):
    result_path = f'/home/yu79deg/darkfield_p5438/Aime/pickles/{jobname}_res.pickle'
    waited = 0
    while not os.path.exists(result_path) and waited < timeout:
        print(f'Waiting for job {jobname} to complete...')
        time.sleep(check_interval)
        waited += check_interval
    if not os.path.exists(result_path):
        raise TimeoutError(f'Job {jobname} did not complete in time.')
    return result_path


def extract_shadow_factor(pickle_path):
    result_data = mu.loadPickle(pickle_path)
    params = result_data[1]
    shadow = params['intensities']['roi2'] / params['intensities']['TCC']
    return shadow


def objective_function(O2_value, template_path, output_path, base_index):
    yaml_filename, _ = generate_yaml(template_path, output_path, O2_value, base_index)
    jobname = os.path.splitext(yaml_filename)[0]
    submit_job(yaml_filename, sim_index=base_index)
    result_path = wait_for_completion(jobname)
    shadow_factor = extract_shadow_factor(result_path)
    return -torch.log10(torch.tensor([shadow_factor]))


In [None]:
bounds = torch.tensor([[0.0], [240.0]])

def botorch_optimization(n_iters=10, n_init=3):
    train_x = torch.rand(n_init, 1) * (bounds[1] - bounds[0]) + bounds[0]
    train_y = torch.vstack([
        objective_function(x.item(), '/home/yu79deg/darkfield_p5438/yamls/BO_template.yaml', '/home/yu79deg/darkfield_p5438/yamls', i+1)
        for i, x in enumerate(train_x)
    ])
    for iteration in range(n_init, n_iters):
        model = SingleTaskGP(train_x, train_y)
        mll = ExactMarginalLogLikelihood(model.likelihood, model)
        fit_gpytorch_mll(mll)
        EI = ExpectedImprovement(model, best_f=train_y.max())
        candidate, _ = optimize_acqf(EI, bounds=bounds.T, q=1, num_restarts=5, raw_samples=20)
        new_x = candidate.detach()
        new_y = objective_function(new_x.item(), '/home/yu79deg/darkfield_p5438/yamls/BO_template.yaml', '/home/yu79deg/darkfield_p5438/yamls', iteration+1)
        train_x = torch.cat([train_x, new_x])
        train_y = torch.cat([train_y, new_y])
    return train_x, train_y

results_x, results_y = botorch_optimization(n_iters=10, n_init=3)


In [None]:
import pandas as pd

df = pd.DataFrame({'O2_size': results_x.squeeze().numpy(), 'neg_log10_shadow': results_y.squeeze().numpy()})
df['shadow_factor'] = 10 ** (-df['neg_log10_shadow'])
df.to_csv('botorch_results.csv', index=False)
