### Beta distribution samples

In [None]:
from scipy.stats import beta as beta_dist
import matplotlib.pyplot as plt
from neuropy import plotting
import numpy as np

x = np.linspace(0, 1, 100)
alpha = 1
beta = 1
prob = 0.3

fig = plotting.Fig(20, 5)
for i in range(20):
    pdf_values = beta_dist.pdf(x, alpha, beta)

    rand_val = np.random.rand()

    if rand_val < prob:
        alpha += 1
    else:
        beta += 1

    ax = fig.subplot(fig.gs[i])
    ax.fill_between(x, pdf_values, alpha=0.5, color="green")
    ax.set_ylim(0, 8)
    ax.axvline(prob, color="k", ls="--")

### Fitting animal data

In [1]:
import numpy as np
from banditpy.models import Thompson2Arm
import mab_subjects
import pandas as pd
from joblib import Parallel, delayed

exps = mab_subjects.unstruc.allsess + mab_subjects.struc.allsess


def get_thomp_param(exp):
    grp = "struc" if exp.b2a.is_structured else "unstruc"

    if grp == "unstruc":
        task = exp.b2a
        task.auto_block_window_ids()
        reset_bool = task.is_window_start
    else:
        task = exp.b2a
        reset_bool = task.is_session_start

    task = task.filter_by_trials(100, 100)
    model = Thompson2Arm(task, reset_bool=reset_bool)
    model.fit(n_starts=5)

    df = pd.DataFrame(
        {
            "sub_name": exp.sub_name,
            "alpha0": model.alpha0,
            "beta0": model.beta0,
            "lr_chosen": model.lr_chosen,
            "lr_unchosen": model.lr_unchosen,
            "tau": model.tau,
            "grp": "struc" if exp.b2a.is_structured else "unstruc",
            "first_experience": True if "Exp1" in exp.sub_name else False,
        },
        index=[0],
    )
    print(
        f"Processed {exp.sub_name} with alpha0={model.alpha0}, beta0={model.beta0}, lr_chosen={model.lr_chosen}, lr_unchosen={model.lr_unchosen}"
    )
    return df


results = Parallel(n_jobs=6)(delayed(get_thomp_param)(exp) for exp in exps)
params_df = pd.concat(results, ignore_index=True)
params_df.to_csv("thomp_params_reset1.csv", index=False)

In [None]:
exps[-2].b2a.auto_block_window_ids()

In [None]:
exps[-2].b2a.is_window_start.sum()

In [None]:
b = exps[0].b2a.block_ids

np.unique(b, return_counts=True)

In [None]:
exps[-2].sub_name

In [None]:
a = exps[0].b2a.datetime.astype("datetime64[s]")

# np.diff(a)
gap = np.diff(a, prepend=a[0]).astype("timedelta64[s]").astype(int) / 3600

In [None]:
import matplotlib.pyplot as plt

_, ax = plt.subplots()

ax.plot(exps[-2].b2a.window_ids)
# ax.plot(exps[0].b2a.session_ids)
ax2 = ax.twinx()
ax2.plot(exps[-2].b2a.block_ids)

# plt.plot(exps[0].b2a.is_block_start / 2)
# plt.plot(exps[0].b2a.is_session_start / 3)

In [None]:
exps[0].b2a.window_ids, exps[0].b2a.session_ids, exps[0].b2a.block_ids

### V1: Simulating thompson sampling with forgetting

In [None]:
import numpy as np
from banditpy.core import Bandit2Arm
from banditpy.plots import plot_trial_by_trial_2Arm
from neuropy import plotting

n_sim = 500
# probs = np.arange(0.1, 1, 0.1)
probs = [0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]


def run_thomp(delta_s, delta_f, tau):

    choices = []
    rewards = []
    reward_probs = []
    session_ids = []
    for i in range(100):
        # reward_probs_i = np.random.choice(probs, size=2, replace=False)
        reward_probs_i = [p := np.random.choice(probs), 1 - p]
        alpha = np.ones(2)
        beta = np.ones(2)
        for tr in range(100):
            samples = np.random.beta(alpha[:, None], beta[:, None], size=(2, n_sim))
            selected = np.argmax(samples, axis=0)
            choice_prob = np.array([1 - selected.mean(), selected.mean()])
            choice = np.random.choice([0, 1], p=choice_prob)
            random_num = np.random.rand()

            alpha = 1.0 + (alpha - 1.0) * tau
            beta = 1.0 + (beta - 1.0) * tau

            if random_num < reward_probs_i[choice]:
                alpha[choice] += delta_s
                rewards.append(1)
            else:
                beta[choice] += delta_f
                rewards.append(0)

            choices.append(choice)
            session_ids.append(i)
            reward_probs.append(reward_probs_i)

    choices = np.array(choices)
    rewards = np.array(rewards)
    reward_probs = np.array(reward_probs)
    session_ids = np.array(session_ids)

    return choices, rewards, reward_probs, session_ids


fig = plotting.Fig(8, 4, fontsize=10)

params = [[7, 3, 0.5], [5, 5, 0.7], [6, 4, 0.8], [5, 8, 0.9]]
for i, (delta_s, delta_f, tau) in enumerate(params):
    choices, rewards, reward_probs, session_ids = run_thomp(delta_s, delta_f, tau)
    task = Bandit2Arm(
        probs=reward_probs, choices=choices, rewards=rewards, session_ids=session_ids
    )
    perf = task.get_optimal_choice_probability()
    ax = fig.subplot(fig.gs[:3, i])
    plot_trial_by_trial_2Arm(task, ax=ax, sort_by_deltaprob=True)
    ax.set_title(f"deltaS={delta_s}, deltaF={delta_f}, tau={tau}")

    ax2 = fig.subplot(fig.gs[3, i])
    ax2.plot(np.arange(100), perf, color="k")
    ax2.set_ylim(0.4, 1.0)
    ax2.set_xlabel("Trial")
    ax2.set_ylabel("Pr(High)")
    ax2.grid(axis="y")

### V2: Simulating thompson sampling with forgetting

In [None]:
import numpy as np
from banditpy.core import Bandit2Arm
from banditpy.plots import plot_trial_by_trial_2Arm
from neuropy import plotting

n_sim = 500
# probs = np.arange(0.1, 1, 0.1)
probs = [0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]


def run_thomp2(tau, kappa1, kappa2):

    choices = []
    rewards = []
    reward_probs = []
    session_ids = []
    lr = [lr1, lr2]

    for i in range(100):
        reward_probs_i = np.random.choice(probs, size=2, replace=False)
        # reward_probs_i = [p := np.random.choice(probs), 1 - p]
        alpha = np.ones(2)
        beta = np.ones(2)
        s = np.zeros(2)
        f = np.zeros(2)

        for tr in range(100):
            samples = np.random.beta(alpha[:, None], beta[:, None], size=(2, n_sim))
            selected = np.argmax(samples, axis=0)
            choice_prob = np.array([1 - selected.mean(), selected.mean()])
            choice = np.random.choice([0, 1], p=choice_prob)
            random_num = np.random.rand()

            s = tau * s
            f = tau * f

            if random_num < reward_probs_i[choice]:
                s[choice] += 1 * lr[choice]
                rewards.append(1)
            else:
                f[choice] += 1 * lr[choice]
                rewards.append(0)

            alpha = 1.0 + s
            beta = 1.0 + f

            choices.append(choice)
            session_ids.append(i)
            reward_probs.append(reward_probs_i)

    choices = np.array(choices)
    rewards = np.array(rewards)
    reward_probs = np.array(reward_probs)
    session_ids = np.array(session_ids)

    return choices, rewards, reward_probs, session_ids


fig = plotting.Fig(8, 4, fontsize=10)

params = [[0.9, 0.9, 0.9], [0.42, 0.59, 0.47], [0.5, 0.5, 0.5], [0.1, 0.1, 0.5]]
for i, (tau, lr1, lr2) in enumerate(params):
    choices, rewards, reward_probs, session_ids = run_thomp2(tau, lr1, lr2)
    task = Bandit2Arm(
        probs=reward_probs, choices=choices, rewards=rewards, session_ids=session_ids
    )
    perf = task.get_optimal_choice_probability()
    ax = fig.subplot(fig.gs[:3, i])
    plot_trial_by_trial_2Arm(task, ax=ax, sort_by_deltaprob=True)
    ax.set_title(f"tau={tau}, lr1={lr1}, lr2={lr2}")

    ax2 = fig.subplot(fig.gs[3, i])
    ax2.plot(np.arange(100), perf, color="k")
    ax2.set_ylim(0.4, 1.0)
    ax2.set_xlabel("Trial")
    ax2.set_ylabel("Pr(High)")
    ax2.grid(axis="y")

### V3

In [None]:
import numpy as np
from banditpy.core import Bandit2Arm
from banditpy.plots import plot_trial_by_trial_2Arm
from neuropy import plotting

n_sim = 500
# probs = np.arange(0.1, 1, 0.1)
probs = [0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]


def run_thomp2(alpha0, beta0, lr_chosen, lr_unchosen, tau):

    choices = []
    rewards = []
    reward_probs = []
    session_ids = []
    env_type = ""

    for i in range(100):
        rand_env = np.random.rand()
        if rand_env <= 0.5:
            reward_probs_i = [p := np.random.choice(probs), 1 - p]
            env_type = "structured"
        else:
            reward_probs_i = np.random.choice(probs, size=2, replace=False)
            env_type = "unstructured"

        alpha = np.ones(2)
        beta = np.ones(2)
        s = np.zeros(2)
        f = np.zeros(2)

        for tr in range(100):
            alpha = alpha0 + s
            beta = beta0 + f

            samples = np.random.beta(alpha[:, None], beta[:, None], size=(2, n_sim))
            selected = np.argmax(samples, axis=0)
            choice_prob = np.array([1 - selected.mean(), selected.mean()])
            choice = np.random.choice([0, 1], p=choice_prob)
            random_num = np.random.rand()

            s = tau * s
            f = tau * f

            if random_num < reward_probs_i[choice]:
                s[choice] += lr_chosen
                f[1 - choice] += lr_unchosen
                rewards.append(1)
            else:
                f[choice] += lr_chosen
                s[1 - choice] += lr_unchosen
                rewards.append(0)

            choices.append(choice)
            session_ids.append(i)
            reward_probs.append(reward_probs_i)

    choices = np.array(choices)
    rewards = np.array(rewards)
    reward_probs = np.array(reward_probs)
    session_ids = np.array(session_ids)

    return choices, rewards, reward_probs, session_ids, env_type


fig = plotting.Fig(8, 4, fontsize=10)

params = [
    [1, 1, 0.8, 0.6, 0.8],
    [2, 1, 0.5, 0.5, 0.9],
    [1, 6, 0.1, 0.8, 0.9],
    [5, 5, 0.2, 0.3, 0.7],
]
for i, (alpha0, beta0, lr_chosen, lr_unchosen, tau) in enumerate(params):
    choices, rewards, reward_probs, session_ids, env_type = run_thomp2(
        alpha0, beta0, lr_chosen, lr_unchosen, tau
    )
    task = Bandit2Arm(
        probs=reward_probs, choices=choices, rewards=rewards, session_ids=session_ids
    )
    perf = task.get_optimal_choice_probability()
    ax = fig.subplot(fig.gs[:3, i])
    plot_trial_by_trial_2Arm(task, ax=ax, sort_by_deltaprob=True)
    ax.set_title(
        f"env_type={env_type},\nalpha={alpha0}, beta={beta0},\nlr_chosen={lr_chosen}, lr_unchosen={lr_unchosen},\ntau={tau}"
    )

    ax2 = fig.subplot(fig.gs[3, i])
    ax2.plot(np.arange(100), perf, color="k")
    ax2.set_ylim(0.4, 1.0)
    ax2.set_xlabel("Trial")
    ax2.set_ylabel("Pr(High)")
    ax2.grid(axis="y")

### Smoothness around parameter

In [None]:
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm
import mab_subjects
from banditpy.models import Thompson2Arm

file = Path("D:/Data/mab/thomp_params_lr_tau.csv")
df = pd.read_csv(file, sep=",")
# df = df[df["first_experience"] == True]

exps = mab_subjects.unstruc.allsess + mab_subjects.struc.allsess

for i, exp in enumerate(exps):

    task = exp.b2a.filter_by_trials(100, 100)
    model = Thompson2Arm(task)
    params = df[df["sub_name"] == exp.sub_name]
    model.set_params(
        lr1=params["lr1"].values[0],
        lr2=params["lr2"].values[0],
        tau=params["tau"].values[0],
    )
    model.inspect_smoothness()

In [None]:
import numpy as np

np.maximum(3, 4)