### Fit model

In [None]:
import numpy as np
import pandas as pd
from banditpy.models import DecisionModel
from banditpy.models.policy import ThompsonSplit2Arm
from banditpy.models.optim import OptunaOptimizer
import mab_subjects

exps = mab_subjects.mostly_unstruc.allsess + mab_subjects.mostly_unstruc.allsess

params_df = []

for i, exp in enumerate(exps[:1]):
    task = exp.b2a.filter_by_trials(min_trials=100, clip_max=100)
    print(exp.sub_name)

    policy = ThompsonSplit2Arm()
    policy.describe()  # What parameters are available?
    # --- Use this to set custom bounds ---
    # policy.set_bounds(
    #     alpha_0=(-1, 1),
    #     beta_0=(-1, 1),
    #     tau=(0, 1),
    #     lr_c_pos=(0.0, 1.0),
    #     lr_c_neg=(0.0, 1.0),
    #     lr_u_pos=(0.0, 1.0),
    #     lr_u_neg=(0.0, 1.0),
    # )

    model = DecisionModel(task, policy=policy)
    model.fit(
        optimizer=OptunaOptimizer(n_trials=100),
        n_jobs=6,
        n_starts=5,
        progress=True,
    )
    model.describe()
    model
    df = pd.DataFrame(
        {
            "name": exp.sub_name,
            "param": list(model.params.keys()),
            "param_values": list(model.params.values()),
            "grp": exp.group_tag,
        }
    )
    params_df.append(df)

params_df = pd.concat(params_df, ignore_index=True)
# mab_subjects.GroupData().save(params_df, "qlearn_2alphaH")

### Beta distribution samples

In [None]:
from scipy.stats import beta as beta_dist
import matplotlib.pyplot as plt
from neuropy import plotting
import numpy as np

x = np.linspace(0, 1, 100)
alpha = 1
beta = 1
prob = 0.3

fig = plotting.Fig(20, 5)
for i in range(20):
    pdf_values = beta_dist.pdf(x, alpha, beta)

    rand_val = np.random.rand()

    if rand_val < prob:
        alpha += 1
    else:
        beta += 1

    ax = fig.subplot(fig.gs[i])
    ax.fill_between(x, pdf_values, alpha=0.5, color="green")
    ax.set_ylim(0, 8)
    ax.axvline(prob, color="k", ls="--")

In [None]:
import optuna


def objective(trial):
    x = trial.suggest_float("x", -100, 100)
    return x**2


if __name__ == "__main__":
    study = optuna.create_study()
    # The optimization finishes after evaluating 1000 times or 3 seconds.
    study.optimize(objective, n_trials=1000, timeout=3)
    print(f"Best params is {study.best_params} with value {study.best_value}")

### Fitting animal data

In [None]:
import numpy as np
from banditpy.models import Thompson2Arm
import mab_subjects
import pandas as pd
from joblib import Parallel, delayed

exps = mab_subjects.unstruc.allsess + mab_subjects.struc.allsess


def get_thomp_param(exp):
    grp = "struc" if exp.b2a.is_structured else "unstruc"

    if grp == "unstruc":
        task = exp.b2a
        task.auto_block_window_ids()
        reset_bool = task.is_window_start
    else:
        task = exp.b2a
        reset_bool = task.is_session_start

    task = task.filter_by_trials(100, 100)
    model = Thompson2Arm(task, reset_bool=reset_bool)
    model.fit(n_starts=5)

    df = pd.DataFrame(
        {
            "sub_name": exp.sub_name,
            "alpha0": model.alpha0,
            "beta0": model.beta0,
            "lr_chosen": model.lr_chosen,
            "lr_unchosen": model.lr_unchosen,
            "tau": model.tau,
            "grp": "struc" if exp.b2a.is_structured else "unstruc",
            "first_experience": True if "Exp1" in exp.sub_name else False,
        },
        index=[0],
    )
    print(
        f"Processed {exp.sub_name} with alpha0={model.alpha0}, beta0={model.beta0}, lr_chosen={model.lr_chosen}, lr_unchosen={model.lr_unchosen}"
    )
    return df


results = Parallel(n_jobs=6)(delayed(get_thomp_param)(exp) for exp in exps)
params_df = pd.concat(results, ignore_index=True)
params_df.to_csv("thomp_params_reset1.csv", index=False)

### Smoothness around parameter

In [None]:
import pandas as pd
from pathlib import Path
from neuropy import plotting
import seaborn as sns
from statplotannot.plots import SeabornPlotter
from mab_colors import colors_2arm
import mab_subjects
from banditpy.models import Thompson2Arm

file = Path("D:/Data/mab/thomp_params_lr_tau.csv")
df = pd.read_csv(file, sep=",")
# df = df[df["first_experience"] == True]

exps = mab_subjects.unstruc.allsess + mab_subjects.struc.allsess

for i, exp in enumerate(exps):

    task = exp.b2a.filter_by_trials(100, 100)
    model = Thompson2Arm(task)
    params = df[df["sub_name"] == exp.sub_name]
    model.set_params(
        lr1=params["lr1"].values[0],
        lr2=params["lr2"].values[0],
        tau=params["tau"].values[0],
    )
    model.inspect_smoothness()

In [None]:
import numpy as np

np.maximum(3, 4)