In [None]:
import copy
import optuna
import numpy as np
import matplotlib.pyplot as plt
from bcmix import *

In [None]:
ACTION_RANGE = (-5.0, 5.0)
N_TRIALS = 25
DATA_LEN = 81
P = 0.025

In [None]:
# true value
alpha, beta = -1.8, 2.2
mean_true = np.array([[4.0], [2.0]])
covm_true = np.array([[1.0, 0.0], [0.0, 1.0]])

# prior
canonical_0 = np.array([[4.0], [2.0]])
precision_0 = np.array([[1.0, 0.0], [0.0, 1.0]])
logcon_0 = (np.linalg.slogdet(precision_0)[1] - (canonical_0.T @ np.linalg.inv(precision_0) @ canonical_0).item()) / 2
states = {0: {"can": canonical_0, "pre": precision_0, "log": logcon_0, "pit": 0.0}}

print(alpha, beta, myopic(canonical_0, precision_0))

### Rollout

In [None]:
def objective_bcmix(trial):
    a = trial.suggest_float('a', ACTION_RANGE[0], ACTION_RANGE[1])
    q = q_myopic_with_change(states_i, a, alpha_i, beta_i, mean_true, covm_true, P)
    return q

def objective(trial):
    a = trial.suggest_float('a', ACTION_RANGE[0], ACTION_RANGE[1])
    q = q_myopic_without_change(canonical_i, precision_i, a, alpha_i, beta_i)
    return q

In [None]:
def valid_param(alpha_new, beta_new):
    optimal_a = (W * XSTAR + beta_new * YSTAR - alpha_new * beta_new) / (beta_new ** 2 + W)
    return (optimal_a > ACTION_RANGE[0]) and (optimal_a < ACTION_RANGE[1])

In [None]:
for i in range(1):
    # initialize
    simresult_bcmix_i = np.full((DATA_LEN, 6 * (1 + M1 + M2) + 5), 0.0)
    simresult_i = np.full((DATA_LEN, 10), np.nan)
    alpha_i, beta_i = alpha, beta
    canonical_i, precision_i, states_i = canonical_0, precision_0, copy.deepcopy(states)
    # change point locations
    cp_j = np.random.randint(1, 3)
    if cp_j == 2:
        cp_j = [np.random.randint(5, 31), np.random.randint(50, 76)]
    else:
        cp_j = [np.random.randint(10, 71)]
    # simulate sequence
    for j in range(DATA_LEN):
        # current state
        # bcmix model
        for m, s in states_i.items():
            simresult_bcmix_i[j, 0] = alpha_i
            simresult_bcmix_i[j, 1] = beta_i
            covm_bcmix_i = np.linalg.inv(s["pre"])
            mean_bcmix_i = covm_bcmix_i @ s["can"]
            simresult_bcmix_i[j, m * 6 + 2] = mean_bcmix_i[0][0]
            simresult_bcmix_i[j, m * 6 + 3] = mean_bcmix_i[1][0]
            simresult_bcmix_i[j, m * 6 + 4] = covm_bcmix_i[0][0]
            simresult_bcmix_i[j, m * 6 + 5] = covm_bcmix_i[0][1]
            simresult_bcmix_i[j, m * 6 + 6] = covm_bcmix_i[1][1]
            simresult_bcmix_i[j, m * 6 + 7] = s["pit"]
        # old model
        simresult_i[j, 0] = alpha_i
        simresult_i[j, 1] = beta_i
        covm_i = np.linalg.inv(precision_i)
        mean_i = covm_i @ canonical_i
        simresult_i[j, 2] = mean_i[0][0]
        simresult_i[j, 3] = mean_i[1][0]
        simresult_i[j, 4] = covm_i[0][0]
        simresult_i[j, 5] = covm_i[0][1]
        simresult_i[j, 6] = covm_i[1][1]
        # select action
        # bcmix model
        study_bcmix = optuna.create_study(direction="maximize")
        study_bcmix.optimize(objective_bcmix, n_trials=N_TRIALS)
        a_bcmix = study_bcmix.best_trial.params['a']
        simresult_bcmix_i[j, -3] = a_bcmix
        simresult_bcmix_i[j, -2] = study_bcmix.best_trial.value
        # old model
        study = optuna.create_study(direction="maximize")
        study.optimize(objective, n_trials=N_TRIALS)
        a = study.best_trial.params['a']
        simresult_i[j, 7] = a
        simresult_i[j, 8] = study.best_trial.value
        # update state
        # bcmix model
        if j in cp_j:
            alpha_i, beta_i = np.random.multivariate_normal(mean_true.flatten(), covm_true)
            while not valid_param(alpha_i, beta_i):
                alpha_i, beta_i = np.random.multivariate_normal(mean_true.flatten(), covm_true)
        y_bcmix, alpha_i, beta_i = env_response(a_bcmix, alpha_i, beta_i, None, None, 0)
        simresult_bcmix_i[j, -1] = reward(a_bcmix, y_bcmix)
        states_i = update_with_change(states_i, a_bcmix, y_bcmix, P)
        # old model
        y = env_response(a, alpha_i, beta_i)[0]
        simresult_i[j, 9] = y
        canonical_i, precision_i = update_without_change(canonical_i, precision_i, a, y)
    np.save("simulations\sim_cpbcmix_" + str(i) + ".npy", simresult_bcmix_i)
    np.save("simulations\sim_cpold_" + str(i) + ".npy", simresult_i)


In [None]:
for i in range(200):
    # initialize
    simresult_myopic_i = np.full((DATA_LEN, 6 * (1 + M1 + M2) + 5), 0.0)
    alpha_i, beta_i = alpha, beta
    states_i = copy.deepcopy(states)
    # change point locations
    cp_j = np.random.randint(1, 3)
    if cp_j == 2:
        cp_j = [np.random.randint(1, 41), np.random.randint(41, 81)]
    else:
        cp_j = [np.random.randint(1, 81)]
    # simulate sequence
    for j in range(DATA_LEN):
        # current state
        for m, s in states_i.items():
            simresult_myopic_i[j, 0] = alpha_i
            simresult_myopic_i[j, 1] = beta_i
            covm_bcmix_i = np.linalg.inv(s["pre"])
            mean_bcmix_i = covm_bcmix_i @ s["can"]
            simresult_myopic_i[j, m * 6 + 2] = mean_bcmix_i[0][0]
            simresult_myopic_i[j, m * 6 + 3] = mean_bcmix_i[1][0]
            simresult_myopic_i[j, m * 6 + 4] = covm_bcmix_i[0][0]
            simresult_myopic_i[j, m * 6 + 5] = covm_bcmix_i[0][1]
            simresult_myopic_i[j, m * 6 + 6] = covm_bcmix_i[1][1]
            simresult_myopic_i[j, m * 6 + 7] = s["pit"]
        # select action
        a_myopic = myopic_mix(states_i, P)
        simresult_myopic_i[j, -3] = a_myopic
        #simresult_myopic_i[j, -2] = q_myopic_with_change(states_i, a_myopic, alpha_i, beta_i, mean_true, covm_true, P)
        # update state
        if j in cp_j:
            alpha_i, beta_i = np.random.multivariate_normal(mean_true.flatten(), covm_true)
            while not valid_param(alpha_i, beta_i):
                alpha_i, beta_i = np.random.multivariate_normal(mean_true.flatten(), covm_true)
        y_myopic, alpha_i, beta_i = env_response(a_myopic, alpha_i, beta_i, None, None, 0)
        simresult_myopic_i[j, -1] = reward(a_myopic, y_myopic)
        states_i = update_with_change(states_i, a_myopic, y_myopic, P)
    np.save("simulations\sim_cpmyopic_" + str(i) + ".npy", simresult_myopic_i)


### Plot

In [None]:
regrets_bcmix = np.full(DATA_LEN, 0.0)
regrets = np.full(DATA_LEN, 0.0)

for i in range(1):
    simresult_bcmix_i = np.load("simulations\sim_cpbcmix_" + str(i) + ".npy")
    simresult_i = np.load("simulations\sim_cpold_" + str(i) + ".npy")
    for j in range(DATA_LEN):
        alpha, beta = simresult_bcmix_i[j, 0], simresult_bcmix_i[j, 1]
        regrets_bcmix[j] += (GAMMA ** j) * (alpha + simresult_bcmix_i[j, -3] * beta) ** 2
        regrets[j] += (GAMMA ** j) * (alpha + simresult_i[j, 7] * beta) ** 2

regrets_bcmix /= 1
regrets /= 1

In [None]:
# plot regret
plt.plot(np.cumsum(regrets_bcmix), color="red")
plt.plot(np.cumsum(regrets), color="blue")