### Plotting alpha0 and beta0 across grps

In [None]:
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm

file = Path("D:/Data/mab/thomp_params3.csv")
df = pd.read_csv(file)
df["alpha0/beta0"] = df["alpha0"] / df["beta0"]
df["alpha0/(alpha0+beta0)"] = df["alpha0"] / (df["alpha0"] + df["beta0"])

df = pd.melt(
    df,
    id_vars=["grp"],
    value_vars=["alpha0", "beta0", "alpha0/beta0", "alpha0/(alpha0+beta0)"],
    value_name="param_values",
    var_name="param",
)

fig = plotting.Fig(5, 5)

ax = fig.subplot(fig.gs[0])
sns.stripplot(
    data=df,
    x="param",
    y="param_values",
    hue="grp",
    dodge=True,
    alpha=0.5,
    size=3,
    palette=["k"],
)
SeabornPlotter(
    data=df, x="param", y="param_values", hue="grp", hue_order=["unstruc", "struc"]
).barplot(palette=colors_2arm(1.2), errorbar="se").bootstrap_test(n_resamples=10000)
ax.get_legend().remove()

In [None]:
alpha = np.ones(2)
beta = np.ones(2)

a = np.random.beta(alpha[:, None], beta[:, None], size=(2, 1000))
b = np.argmax(a, axis=0)
c = (b == 1).mean()
c

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from neuropy import plotting

alphas = [5, 1, 2]
betas = [25, 1, 18]


fig = plotting.Fig(5, 3)

for i, (a, b) in enumerate(zip(alphas, betas)):

    ax = fig.subplot(fig.gs[0, i])
    vals = np.random.beta(a, b, size=10000)
    ax.hist(vals, bins=100, density=True, alpha=0.5, color="blue")
    ax.set_xlim(0, 1)

### Thompson sampling with forgetting

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm
from statplotannot.plots import fix_legend

file = Path("D:/Data/mab/thomp_params_lr_tau2.csv")
df = pd.read_csv(file, sep=",")
df1 = df.copy()
df = df[df["first_experience"] == True]
df["lr_mean"] = (df["lr1"] + df["lr2"]) / 2
df["lr_relative"] = np.abs(df["lr1"] - df["lr2"]) / (df["lr1"] + df["lr2"])


df = pd.melt(
    df,
    id_vars=["grp", "sub_name", "first_experience"],
    var_name="param",
    value_name="value",
)
df = df[df["param"] != "bic"]

fig = plotting.Fig(5, 4, fontsize=10)

ax = fig.subplot(fig.gs[1])
SeabornPlotter(
    data=df, x="param", y="value", hue="grp", hue_order=["unstruc", "struc"]
).barplot(palette=colors_2arm(1.2), errorbar="se").bootstrap_test(n_resamples=10000)

sns.stripplot(
    data=df,
    x="param",
    y="value",
    hue="grp",
    dodge=True,
    alpha=0.5,
    size=3,
    palette=["k", "k"],
)
ax.set_xlabel("")
ax.set_ylabel("Parameter values")
ax.set_title("Thompson Sampling fitting")
ax.tick_params(rotation=90)
ax.set_ylim(0, 9)
fix_legend(ax)

### Thompson sampling with forgetting

In [13]:
import numpy as np
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm
from statplotannot.plots import fix_legend

file = Path("D:/Data/mab/thomp_params_lr_split1.csv")
df = pd.read_csv(file, sep=",")
df1 = df.copy()
df = df[df["first_experience"] == True]
df["alpha0_beta0_asymmetry"] = np.abs(df["alpha0"] - df["beta0"]) / (
    df["alpha0"] + df["beta0"]
)

dflr = pd.DataFrame()
dflr["(c+ + c-) / (u+ + u-)"] = df["lr_c_pos"] + df["lr_c_neg"] / (
    df["lr_u_pos"] + df["lr_u_neg"]
)
dflr["(c- + u-) / (c+ + u+)"] = df["lr_c_neg"] + df["lr_u_neg"] / (
    df["lr_c_pos"] + df["lr_u_pos"]
)
dflr["grp"] = df["grp"]

df = pd.melt(
    df,
    id_vars=["grp", "sub_name", "first_experience"],
    var_name="param",
    value_name="value",
)

dflr = pd.melt(
    dflr,
    id_vars=["grp"],
    var_name="param",
    value_name="value",
)

df = df[df["param"] != "bic"]
df = df[df["param"] != "alpha0"]
df = df[df["param"] != "beta0"]


fig = plotting.Fig(5, 8, fontsize=10)

ax = fig.subplot(fig.gs[1:3])
SeabornPlotter(
    data=df, x="param", y="value", hue="grp", hue_order=["unstruc", "struc"]
).barplot(palette=colors_2arm(1.2), errorbar="se").bootstrap_test(n_resamples=10000)

sns.stripplot(
    data=df,
    x="param",
    y="value",
    hue="grp",
    dodge=True,
    alpha=0.5,
    size=3,
    palette=["k", "k"],
)

ax.set_xlabel("")
ax.set_ylabel("Parameter values")
ax.set_title("Thompson Sampling fitting")
ax.tick_params(rotation=90)
# ax.set_ylim(0, 9)
# fix_legend(ax)
ax.get_legend().remove()

ax = fig.subplot(fig.gs[5])
SeabornPlotter(
    data=dflr, x="param", y="value", hue="grp", hue_order=["unstruc", "struc"]
).barplot(palette=colors_2arm(1.2), errorbar="se").bootstrap_test(n_resamples=10000)

sns.stripplot(
    data=dflr,
    x="param",
    y="value",
    hue="grp",
    dodge=True,
    alpha=0.5,
    size=3,
    palette=["k", "k"],
)

ax.set_xlabel("")
ax.set_ylabel("Parameter values")
ax.set_title("Thompson Sampling fitting")
ax.tick_params(rotation=90)
# ax.set_ylim(0, 9)
# fix_legend(ax)
ax.get_legend().remove()

In [23]:
import numpy as np
from banditpy.core import Bandit2Arm
from banditpy.plots import plot_trial_by_trial_2Arm
from neuropy import plotting

n_sim = 500
# probs = np.arange(0.1, 1, 0.1)
probs = [0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9]


def run_thomp2(alpha0, beta0, lr_c_pos, lr_u_neg, lr_c_neg, lr_u_pos, tau):

    choices = []
    rewards = []
    reward_probs = []
    session_ids = []
    env_type = ""

    for i in range(100):
        rand_env = np.random.rand()
        if rand_env <= 0.5:
            reward_probs_i = [p := np.random.choice(probs), 1 - p]
            env_type = "structured"
        else:
            reward_probs_i = np.random.choice(probs, size=2, replace=False)
            env_type = "unstructured"

        alpha = np.ones(2)
        beta = np.ones(2)
        s = np.zeros(2)
        f = np.zeros(2)

        for tr in range(100):
            alpha = alpha0 + s
            beta = beta0 + f

            samples = np.random.beta(alpha[:, None], beta[:, None], size=(2, n_sim))
            selected = np.argmax(samples, axis=0)
            choice_prob = np.array([1 - selected.mean(), selected.mean()])
            choice = np.random.choice([0, 1], p=choice_prob)
            random_num = np.random.rand()

            s = tau * s
            f = tau * f

            if random_num < reward_probs_i[choice]:
                s[choice] += lr_c_pos
                f[1 - choice] += lr_u_neg
                rewards.append(1)
            else:
                f[choice] += lr_c_neg
                s[1 - choice] += lr_u_pos
                rewards.append(0)

            choices.append(choice)
            session_ids.append(i)
            reward_probs.append(reward_probs_i)

    choices = np.array(choices)
    rewards = np.array(rewards)
    reward_probs = np.array(reward_probs)
    session_ids = np.array(session_ids)

    return choices, rewards, reward_probs, session_ids, env_type


fig = plotting.Fig(8, 4, fontsize=10)

params = [
    [1, 1, 0.8, 0.6, 0.8, 0.4, 0.8],
    [2, 1, 0.5, 0.5, 0.4, 0.3, 0.8],
    [1, 2, 0.1, 0.8, 0.7, 0.2, 0.8],
    # [5, 5, 0.2, 0.3, 0.7],
]
for i, (alpha0, beta0, lr_c_pos, lr_u_neg, lr_c_neg, lr_u_pos, tau) in enumerate(
    params
):
    choices, rewards, reward_probs, session_ids, env_type = run_thomp2(
        alpha0, beta0, lr_c_pos, lr_u_neg, lr_c_neg, lr_u_pos, tau
    )
    task = Bandit2Arm(
        probs=reward_probs, choices=choices, rewards=rewards, session_ids=session_ids
    )
    perf = task.get_optimal_choice_probability()
    ax = fig.subplot(fig.gs[:3, i])
    plot_trial_by_trial_2Arm(task, ax=ax, sort_by_deltaprob=True)
    ax.set_title(
        f"env_type={env_type},\nalpha={alpha0}, beta={beta0},\nlr_c_pos={lr_c_pos}, lr_u_neg={lr_u_neg},\nlr_c_neg={lr_c_neg}, lr_u_pos={lr_u_pos},\ntau={tau}"
    )

    ax2 = fig.subplot(fig.gs[3, i])
    ax2.plot(np.arange(100), perf, color="k")
    ax2.set_ylim(0.4, 1.0)
    ax2.set_xlabel("Trial")
    ax2.set_ylabel("Pr(High)")
    ax2.grid(axis="y")

### TinyRNN d vs nll

In [20]:
import numpy as np
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm
from statplotannot.plots import fix_legend

file = Path("D:/Data/mab/tinyRNN_results.csv")
df = pd.read_csv(file, sep=",")
df1 = df.copy()
df = df[df["first_experience"] == True]

fig = plotting.Fig(5, 4, fontsize=10)
ax = fig.subplot(fig.gs[1])

SeabornPlotter(
    data=df,
    x="d",
    y="nll",
    hue="grp",
    hue_order=["unstruc", "struc"],
).pointplot(palette=colors_2arm(1.2), errorbar="se", dodge=0.3)
fix_legend(ax)

ax.set_ylim(0.14, 0.25)

(0.14, 0.25)