In [None]:
import numpy as np
import optuna
import copy
import matplotlib.pyplot as plt
from bcmix import *

In [None]:
ACTION_RANGE = (-5.0, 5.0)
N_TRIALS = 5
DATA_LEN = 8
P = 0.025

In [None]:
# true value
alpha, beta = 1.8, -2.4
mean_true = np.array([[0.0], [0.0]])
covm_true = np.array([[2.0, 0.0], [0.0, 2.0]])

# prior
canonical_0 = np.array([[0.0], [0.0]])
precision_0 = np.array([[1.0, 0.0], [0.0, 1.0]])
logcon_0 = (np.linalg.slogdet(precision_0)[1] - (canonical_0.T @ np.linalg.inv(precision_0) @ canonical_0).item()) / 2
states = {0: {"can": canonical_0, "pre": precision_0, "log": logcon_0, "pit": 0.0}}

print(alpha, beta, myopic(canonical_0, precision_0))

### Rollout

In [None]:
def objective_bcmix(trial):
    a = trial.suggest_float('a', ACTION_RANGE[0], ACTION_RANGE[1])
    q = q_myopic_with_change(states_i, a, alpha_i, beta_i, mean_true, covm_true, p=P)
    return q

def objective(trial):
    a = trial.suggest_float('a', ACTION_RANGE[0], ACTION_RANGE[1])
    q = q_myopic_without_change(canonical_i, precision_i, a, alpha_i, beta_i)
    return q

In [None]:
for i in range(1):
    # initialize
    simresult_bcmix_i = np.full((DATA_LEN * N_TRIALS, 6 * (1 + M1 + M2) + 4), 0.0)
    simresult_i = np.full((DATA_LEN * N_TRIALS, 9), np.nan)
    alpha_i, beta_i = alpha, beta
    canonical_i, precision_i, states_i = canonical_0, precision_0, copy.deepcopy(states)
    for j in range(DATA_LEN):
        js = j * N_TRIALS
        je = js + N_TRIALS
        # current state
        # bcmix model
        for m, s in states_i.items():
            simresult_bcmix_i[js : je, 0] = alpha_i
            simresult_bcmix_i[js : je, 1] = beta_i
            covm_bcmix_i = np.linalg.inv(s["pre"])
            mean_bcmix_i = covm_bcmix_i @ s["can"]
            simresult_bcmix_i[js : je, m * 6 + 2] = mean_bcmix_i[0][0]
            simresult_bcmix_i[js : je, m * 6 + 3] = mean_bcmix_i[1][0]
            simresult_bcmix_i[js : je, m * 6 + 4] = covm_bcmix_i[0][0]
            simresult_bcmix_i[js : je, m * 6 + 5] = covm_bcmix_i[0][1]
            simresult_bcmix_i[js : je, m * 6 + 6] = covm_bcmix_i[1][1]
            simresult_bcmix_i[js : je, m * 6 + 7] = s["pit"]
        # old model
        simresult_i[js : je, 0] = alpha_i
        simresult_i[js : je, 1] = beta_i
        covm_i = np.linalg.inv(precision_i)
        mean_i = covm_i @ canonical_i
        simresult_i[js : je, 2] = mean_i[0][0]
        simresult_i[js : je, 3] = mean_i[1][0]
        simresult_i[js : je, 4] = covm_i[0][0]
        simresult_i[js : je, 5] = covm_i[0][1]
        simresult_i[js : je, 6] = covm_i[1][1]
        # select action
        # bcmix model
        study_bcmix = optuna.create_study(direction="maximize")
        study_bcmix.optimize(objective_bcmix, n_trials=N_TRIALS)
        simresult_bcmix_i[js : je, -2] = [_.params['a'] for _ in study_bcmix.trials]
        simresult_bcmix_i[js : je, -1] = [_.value for _ in study_bcmix.trials]
        a_bcmix = study_bcmix.best_trial.params['a']
        # old model
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=N_TRIALS)
        simresult_i[js : je, 7] = [_.params['a'] for _ in study.trials]
        simresult_i[js : je, 7] = [_.value for _ in study.trials]
        a = study.best_trial.params['a']
        # update state
        # bcmix model
        y_bcmix, alpha_i, beta_i = env_response(a_bcmix, alpha_i, beta_i, mean_true, covm_true, p=P)
        states_i = update_with_change(states_i, a_bcmix, y_bcmix, p=P)
        # old model
        y = env_response(a, alpha_i, beta_i)[0]
        canonical_i, precision_i = update_without_change(canonical_i, precision_i, a, y)
    #np.save("simulations\sim_cgbomix_" + str(i) + ".npy", simresult_bcmix_i)
    #np.save("simulations\sim_cgold_" + str(i) + ".npy", simresult_i)

### Simulate data

In [None]:
xs = np.random.uniform(ACTION_RANGE[0], ACTION_RANGE[1], DATA_LEN)
ys = [env_response(x, alpha, beta)[0] for x in xs[: (DATA_LEN // 2)]]
alpha_new, beta_new = np.random.multivariate_normal(mean_true.flatten(), covm_true)
print(alpha_new, beta_new)
ys = np.array(ys + [env_response(x, alpha_new, beta_new)[0] for x in xs[(DATA_LEN // 2) : DATA_LEN]])