In [None]:
import multiprocessing
import re
from io import StringIO
import uuid

import pandas as pd
import numpy as np

from emukit.core import ParameterSpace, ContinuousParameter, DiscreteParameter
from emukit.core.initial_designs import RandomDesign
from GPy.models import GPRegression
from emukit.model_wrappers import GPyModelWrapper
from emukit.bayesian_optimization.acquisitions import ExpectedImprovement
from emukit.bayesian_optimization.loops import BayesianOptimizationLoop

In [None]:
target_name = 'SEIZ_updated'
# n_runs = 5
# max_parallel_runs = 8
# threads_per_run = multiprocessing.cpu_count()//min(n_runs,max_parallel_runs)
threads_per_run = multiprocessing.cpu_count()
init_exposures = 100

parameter_space = ParameterSpace([ContinuousParameter('p', 0, 1),
                                  ContinuousParameter('l', 0, 1),
                                  ContinuousParameter('epsilon', 2, 2),
                                  DiscreteParameter('prop_init_skeptics', np.arange(1, init_exposures-1, 1, dtype=np.int_))])

In [None]:
def run_job(p, l, epsilon, init_skeptics, init_adopters, clear_results=False):
    with open(f'{target_name}_editable.fred', 'r') as file:
        filedata = file.read()

    vars = {
        '{p}': p, 
        '{l}': l, 
        '{epsilon}': epsilon, 
        '{init_skeptic}': init_skeptics, 
        '{init_adopt}': init_adopters
    }

    run_uuid = uuid.uuid4()

    for target, value in vars.items():
        filedata = filedata.replace(target, str(value))

    with open(f'out/{target_name}_{run_uuid}_out.fred', 'w') as file:
        file.write(filedata)
        
    if clear_results:
        !yes | fred_clear_all_results
    
    # !fred_job -p {target_name}_{run_uuid}_out.fred -k {target_name}_{p}_{l}_{epsilon}_run -n {n_runs} -m {max_parallel_runs} -t {threads_per_run}
    !fred_job -p out/{target_name}_{run_uuid}_out.fred -k {target_name}_{run_uuid}_run -t {threads_per_run}
    !fred_csv -k {target_name}_{run_uuid}_run > results/{target_name}_{run_uuid}_run.csv

    with open(f'results/{target_name}_{run_uuid}_run.csv', 'r') as file:
        lines = file.readlines()

    with open(f'results/{target_name}_{run_uuid}_run.csv', 'w') as file:
        lines[0] = ', '.join(map(lambda item: f'{item[0][1:-1]}={item[1]}', vars.items()))+'\n'
        file.writelines(lines)

    # with open(f'results/{target_name}_{p}_{l}_{epsilon}_run.csv', 'r') as csv:
    #     results = csv.read()
    # 
    # results = re.split('Run,\d\n', results)[1:]
    # return [pd.read_csv(StringIO(data)) for data in results]
    return pd.read_csv(f'results/{target_name}_{run_uuid}_run.csv', header=2)

In [None]:
def sample(X, **kwargs):
    X = np.array(X)
    
    if X.ndim < 2:
        X = np.expand_dims(X, 0)
    elif X.ndim > 2:
        raise Exception(f'X has too many dimensions (ndim={X.ndim}, must be 1 or 2)')
    if X.shape[-1] != len(parameter_space._parameters):
        raise Exception(f'X has the wrong number of variables (variables={X.shape[-1]}, must be {len(parameter_space._parameters)})')
    
    Y = []

    for (p, l, epsilon, init_skeptics) in X: # TODO: parallelize this?
        data = run_job(p, l, epsilon, init_skeptics, init_exposures-init_skeptics, **kwargs)
        # Y.append([run['ADOPT.I'].iat[-1] for run in data])
        Y.append([data['ADOPT.I'].iat[-1]])
        
    return np.vstack(Y)

In [None]:
design = RandomDesign(parameter_space)
num_data_points = 5
X = design.get_samples(num_data_points)

In [None]:
X

In [None]:
Y = sample(X, clear_results=True)

In [None]:
Y

In [None]:
model_gpy = GPRegression(X,Y)
model_emukit = GPyModelWrapper(model_gpy)

In [None]:
expected_improvement = ExpectedImprovement(model=model_emukit)

In [None]:
bayesopt_loop = BayesianOptimizationLoop(model=model_emukit,
                                         space=parameter_space,
                                         acquisition=expected_improvement,
                                         batch_size=1)

In [None]:
max_iterations = 25
bayesopt_loop.run_loop(sample, max_iterations)

In [None]:
results = bayesopt_loop.get_results()

In [None]:
print(results.minimum_location, results.minimum_value)
print(results.best_found_value_per_iteration)