### Hyperparameter search

In [None]:
import numpy as np
from banditpy.models import BanditTrainer2Arm
import pandas as pd


def adaptive_hyperparameter_search():
    """Adaptive search that learns from previous results"""

    # Start with educated guesses
    candidates = [
        {"lr": 1e-4, "beta_entropy": 0.1, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 5e-4, "beta_entropy": 0.15, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 1e-4, "beta_entropy": 0.2, "beta_value": 0.1, "hidden_size": 48},
    ]

    results = []

    for round_num in range(3):  # 3 rounds of refinement
        print(f"\n--- Round {round_num + 1} ---")

        round_results = []
        for i, params in enumerate(candidates):
            print(f"Testing: {params}")

            bt = BanditTrainer2Arm(**params, model_path=f"adaptive_{round_num}_{i}.pt")
            bt.train(mode="U", n_sessions=2500, n_trials=200)

            metrics = bt.comprehensive_evaluation()

            result = params.copy()
            result.update(metrics)
            result["round"] = round_num
            round_results.append(result)
            results.append(result)

        # Find best and generate new candidates around it
        round_df = pd.DataFrame(round_results)
        best = round_df.loc[round_df["composite_score"].idxmax()]

        # Generate new candidates around best
        candidates = generate_candidates_around_best(best)

    return pd.DataFrame(results)


def generate_candidates_around_best(best_params):
    """Generate new candidates around best parameters"""
    candidates = []

    # Variations around best
    variations = [
        {"lr_mult": 0.7, "entropy_mult": 0.8, "value_mult": 1.0},
        {"lr_mult": 1.0, "entropy_mult": 1.2, "value_mult": 0.8},
        {"lr_mult": 1.3, "entropy_mult": 1.0, "value_mult": 1.2},
        {"lr_mult": 0.8, "entropy_mult": 1.5, "value_mult": 1.0},
    ]

    for var in variations:
        candidate = {
            "lr": best_params["lr"] * var["lr_mult"],
            "beta_entropy": best_params["beta_entropy"] * var["entropy_mult"],
            "beta_value": best_params["beta_value"] * var["value_mult"],
            "hidden_size": int(best_params["hidden_size"]),
        }
        candidates.append(candidate)

    return candidates

### Beta search

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mab_subjects
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from banditpy.utils import generate_probs_2arm
from pathlib import Path
from banditpy.core import Bandit2Arm

n_train_sessions = 30000
n_test_sessions = 200

probs = np.array([0.2, 0.3, 0.4, 0.6, 0.7, 0.8])
unstruc_probs_train, struc_probs_train = generate_probs_2arm(
    probs, N=n_train_sessions, frac_impurity=0.16
)
unstruc_probs_test, struc_probs_test = generate_probs_2arm(
    probs, N=n_test_sessions, frac_impurity=0.16
)

basepath = Path("D:/Data/mab/rnn_models/beta_search")
beta_entropys = np.linspace(0.02, 0.1, 30)
beta_values = np.linspace(0.02, 0.1, 30)
lr = 0.00004
gamma = 0.9

search_df = []
for i1, be in enumerate(beta_entropys):
    for i2, bv in enumerate(beta_values):

        # ------ Structured network ----------
        b2a_s = BanditTrainer2Arm(
            lr=lr,
            gamma=gamma,
            beta_entropy=be,
            beta_value=bv,
            device="cpu",
            model_path=basepath / f"beta_search_structured.pt",
        )
        b2a_s.train(
            n_sessions=n_train_sessions,
            mode=struc_probs_train,
            save_model=False,
            return_df=False,
            progress_bar=True,
        )
        dfs = b2a_s.evaluate(n_sessions=n_test_sessions, mode=struc_probs_test)
        task_s = Bandit2Arm.from_df(
            df=dfs,
            probs=["arm1_reward_prob", "arm2_reward_prob"],
            choices="chosen_action",
            rewards="reward",
            session_ids="session_id",
        )
        perf_s = task_s.get_optimal_choice_probability()
        final_perf_s = perf_s[-5:].mean()

        # ------ Untructured network ----------
        b2a_u = BanditTrainer2Arm(
            lr=lr,
            gamma=gamma,
            beta_entropy=be,
            beta_value=bv,
            device="cpu",
            model_path=basepath / f"beta_search_unstructured.pt",
        )
        b2a_u.train(
            n_sessions=n_train_sessions,
            mode=unstruc_probs_train,
            save_model=False,
            return_df=False,
            progress_bar=False,
        )
        dfu = b2a_u.evaluate(n_sessions=n_test_sessions, mode=unstruc_probs_test)
        task_u = Bandit2Arm.from_df(
            df=dfu,
            probs=["arm1_reward_prob", "arm2_reward_prob"],
            choices="chosen_action",
            rewards="reward",
            session_ids="session_id",
        )
        perf_u = task_u.get_optimal_choice_probability()
        final_perf_u = perf_u[-5:].mean()

        df = pd.DataFrame(
            dict(
                beta_entropy=be,
                beta_value=bv,
                final_perf_s=final_perf_s,
                final_perf_u=final_perf_u,
            ),
            index=[0],
        )
        search_df.append(df)

search_df = pd.concat(search_df, ignore_index=True)
# mab_subjects.GroupData().save(search_df, "beta_search_results")

In [None]:
def get_reward_probs(self, mode, N, low=0, high=1, decimals=1):
    """
    Generates reward probabilities for the two arms for a session.
    """
    if isinstance(mode, np.ndarray):
        if mode.shape == (2,):
            p_arm1 = np.ones(N) * mode[0]
            p_arm2 = np.ones(N) * mode[1]
        elif mode.shape == (N, 2):
            p_arm1, p_arm2 = mode[:, 0], mode[:, 1]

        self.train_type = "CustomProbabilities"

    elif isinstance(mode, str):
        match mode:
            case "Structured" | "Struc" | "S":
                p_arm1 = np.round(
                    np.random.uniform(low, high, size=N), decimals=decimals
                )
                p_arm2 = np.round(1.0 - p_arm1, decimals=decimals)

                self.train_type = "Structured"

            case "Unstructured" | "Unstruc" | "U":
                p_arm1 = np.round(
                    np.random.uniform(low, high, size=N), decimals=decimals
                )
                p_arm2 = np.round(
                    np.random.uniform(low, high, size=N), decimals=decimals
                )
                self.train_type = "Unstructured"

    elif isinstance(mode, list):
        assert len(mode) == 2, "Reward probabilities list must have exactly 2 elements."
        p_arm1 = mode[0] * np.ones(N)
        p_arm2 = mode[1] * np.ones(N)
        self.train_type = "CustomProbabilities"

    else:
        raise ValueError(
            "Invalid mode. Use 'Structured'/'Struc'/'S', 'Unstructured'/'Unstruc'/'U', or a list of probabilities of length 2, or a numpy array of shape (2,) or (N, 2)."
        )

    print(p_arm1)
    # Ensure probabilities are valid
    if np.all(p_arm1 <= 1) and np.all(p_arm2 <= 1):
        raise ValueError("Reward probabilities must be between 0 and 1.")

    return np.array([p_arm1, p_arm2]).T  # Index 0 for arm 1, index 1 for arm 2

In [None]:
import numpy as np


probs = np.array([0.2, 0.3, 0.4, 0.6, 0.7, 0.8])
unstruc_probs, struc_probs = generate_probs_2arm(
    probs, N=n_train_sessions, frac_impurity=0.16
)

### Train Network

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from pathlib import Path

basepath = Path("D:/Data/mab/rnn_models/probs_1decimals")
prob_kwargs = dict(high=0.91, low=0.1, decimals=1)

n_sessions = 30000

for i in range(10):
    # ------ Structured network ----------
    b2a_s = BanditTrainer2Arm(model_path=basepath / f"structured_2arm_model{i}.pt")
    b2a_s.train(n_sessions=n_sessions, mode="Struc", return_df=False, **prob_kwargs)
    b2a_s.save_model()

    # ------ Untructured network ----------
    b2a_u = BanditTrainer2Arm(model_path=basepath / f"unstructured_2arm_model{i}.pt")
    b2a_u.train(n_sessions=n_sessions, mode="Unstruc", return_df=False, **prob_kwargs)
    b2a_u.save_model()

### Train Network with custom probabilities

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from banditpy.utils import generate_probs_2arm
from pathlib import Path

n_sessions = 30000

probs = np.array([0.2, 0.3, 0.4, 0.6, 0.7, 0.8])
unstruc_probs, struc_probs = generate_probs_2arm(
    probs, N=n_sessions, frac_impurity=0.16
)

basepath = Path("D:/Data/mab/rnn_models/Train1dec_0.16impure_345reset/")

for i in range(10):
    # ------ Structured network ----------
    b2a_s = BanditTrainer2Arm(
        model_path=basepath / f"structured_2arm_model{i}.pt", device="cpu"
    )
    b2a_s.train(n_sessions=n_sessions, mode=struc_probs, return_df=False)
    b2a_s.save_model()

    # ------ Untructured network ----------
    b2a_u = BanditTrainer2Arm(
        model_path=basepath / f"unstructured_2arm_model{i}.pt", device="cpu"
    )
    b2a_u.train(n_sessions=n_sessions, mode=unstruc_probs, return_df=False)
    b2a_u.save_model()