In [None]:
import numpy as np
import optuna
import matplotlib.pyplot as plt
from bcmix import *

In [None]:
ACTION_RANGE = (-5.0, 5.0)
N_TRIALS = 100
DATA_LEN = 8

In [None]:
# prior
canonical_0 = np.array([[0.0], [0.0]])
precision_0 = np.array([[1.0, 0.0], [0.0, 1.0]])
covm_0 = np.linalg.inv(precision_0)
mean_0 = covm_0 @ canonical_0

# true value
#alpha, beta = np.random.multivariate_normal(mean_0.flatten(), covm_0)
alpha, beta = 1.8, -2.4
print(alpha, beta, myopic(canonical_0, precision_0))

### Rollout

In [None]:
def objective(trial):
    a = trial.suggest_float('a', ACTION_RANGE[0], ACTION_RANGE[1])
    q = q_myopic_without_change(canonical_i, precision_i, a, alpha, beta)
    return q

In [None]:
for i in range(2):
    # initialize
    simresult_i = np.full((DATA_LEN * N_TRIALS, 7), np.nan)
    canonical_i, precision_i = canonical_0, precision_0
    for j in range(DATA_LEN):
        # current state
        covm_i = np.linalg.inv(precision_i)
        mean_i = covm_i @ canonical_i
        js = j * N_TRIALS
        je = j * N_TRIALS + N_TRIALS
        simresult_i[js : je, 0] = mean_i[0][0]
        simresult_i[js : je, 1] = mean_i[1][0]
        simresult_i[js : je, 2] = covm_i[0][0]
        simresult_i[js : je, 3] = covm_i[0][1]
        simresult_i[js : je, 4] = covm_i[1][1]
        # select action
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=N_TRIALS)
        simresult_i[js : je, 5] = [_.params['a'] for _ in study.trials]
        simresult_i[js : je, 6] = [_.value for _ in study.trials]
        a = study.best_trial.params['a']
        # update state
        y = env_response(a, alpha, beta)[0]
        canonical_i, precision_i = update_without_change(canonical_i, precision_i, a, y)
    np.save("simulations\rolloutnocg\simresult_" + str(i) + ".npy", simresult_i)

In [None]:
simresult_i = np.load("simulations\rolloutnocg\simresult_0.npy")

In [None]:
# plot Q values
plt.plot([simresult_i[(_ * N_TRIALS) : (_ * N_TRIALS + N_TRIALS), 6].max() for _ in range(DATA_LEN)])

In [None]:
# plot alpha estimations
plt.plot([simresult_i[(_ * N_TRIALS) : (_ * N_TRIALS + N_TRIALS), 0].max() for _ in range(DATA_LEN)])

In [None]:
# plot beta estimations
plt.plot([simresult_i[(_ * N_TRIALS) : (_ * N_TRIALS + N_TRIALS), 1].max() for _ in range(DATA_LEN)])