### Hyperparameter search

In [None]:
import numpy as np
from banditpy.models import BanditTrainer2Arm
import pandas as pd


def adaptive_hyperparameter_search():
    """Adaptive search that learns from previous results"""

    # Start with educated guesses
    candidates = [
        {"lr": 1e-4, "beta_entropy": 0.1, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 5e-4, "beta_entropy": 0.15, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 1e-4, "beta_entropy": 0.2, "beta_value": 0.1, "hidden_size": 48},
    ]

    results = []

    for round_num in range(3):  # 3 rounds of refinement
        print(f"\n--- Round {round_num + 1} ---")

        round_results = []
        for i, params in enumerate(candidates):
            print(f"Testing: {params}")

            bt = BanditTrainer2Arm(**params, model_path=f"adaptive_{round_num}_{i}.pt")
            bt.train(mode="U", n_sessions=2500, n_trials=200)

            metrics = bt.comprehensive_evaluation()

            result = params.copy()
            result.update(metrics)
            result["round"] = round_num
            round_results.append(result)
            results.append(result)

        # Find best and generate new candidates around it
        round_df = pd.DataFrame(round_results)
        best = round_df.loc[round_df["composite_score"].idxmax()]

        # Generate new candidates around best
        candidates = generate_candidates_around_best(best)

    return pd.DataFrame(results)


def generate_candidates_around_best(best_params):
    """Generate new candidates around best parameters"""
    candidates = []

    # Variations around best
    variations = [
        {"lr_mult": 0.7, "entropy_mult": 0.8, "value_mult": 1.0},
        {"lr_mult": 1.0, "entropy_mult": 1.2, "value_mult": 0.8},
        {"lr_mult": 1.3, "entropy_mult": 1.0, "value_mult": 1.2},
        {"lr_mult": 0.8, "entropy_mult": 1.5, "value_mult": 1.0},
    ]

    for var in variations:
        candidate = {
            "lr": best_params["lr"] * var["lr_mult"],
            "beta_entropy": best_params["beta_entropy"] * var["entropy_mult"],
            "beta_value": best_params["beta_value"] * var["value_mult"],
            "hidden_size": int(best_params["hidden_size"]),
        }
        candidates.append(candidate)

    return candidates

### Beta search
Code was run on server. Based on the plot below beta_entropy=0.045 and beta_value = 0.025 were considered ideal.

In [66]:
import pandas as pd
import numpy as np
import mab_subjects
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
from neuropy import plotting

# df = pd.read_csv("D:/Data/beta_search_results.csv")
df = mab_subjects.GroupData().beta_search_results
x_grid = np.linspace(0.02, 0.1, 25)
y_grid = np.linspace(0.02, 0.1, 25)
x = df["beta_entropy"]
y = df["beta_value"]
z1 = df["final_perf_u"].to_numpy().reshape(25, 25).T
z2 = df["final_perf_s"].to_numpy().reshape(25, 25).T

z1 = gaussian_filter(z1, sigma=0.8)
z2 = gaussian_filter(z2, sigma=0.8)

fig = plotting.Fig(6, 4)

axs = [fig.subplot(fig.gs[_]) for _ in range(3)]

axs[0].pcolormesh(x_grid, y_grid, z1, shading="gourad", cmap="hot", vmin=0.55, vmax=0.7)
axs[0].set_xlabel("Beta Entropy")
axs[0].set_ylabel("Beta Value")
axs[0].set_title("Final Performance U")

axs[1].pcolormesh(x_grid, y_grid, z2, shading="gourad", cmap="hot", vmin=0.55, vmax=0.7)
axs[1].set_xlabel("Beta Entropy")
axs[1].set_ylabel("Beta Value")
axs[1].set_title("Final Performance S")

good_indx = (df["final_perf_u"] > 0.6) & (df["final_perf_s"] > 0.6)
colors = np.array(["k"] * len(x))
colors[good_indx] = "r"
axs[2].scatter(df["final_perf_s"], df["final_perf_u"], s=5, c=colors)
axs[2].axvline(x=0.6, color="gray", linestyle="--")
axs[2].axhline(y=0.6, color="gray", linestyle="--")
axs[2].set_xlabel("Final Performance S")
axs[2].set_ylabel("Final Performance U")

  axs[0].pcolormesh(x_grid, y_grid, z1, shading="gourad", cmap="hot", vmin=0.55, vmax=0.7)
  axs[1].pcolormesh(x_grid, y_grid, z2, shading="gourad", cmap="hot", vmin=0.55, vmax=0.7)


Text(0, 0.5, 'Final Performance U')

### Gamma search
Code was run on server. After fixing beta_entropy=0.045 and beta_value = 0.025, gamma was varried between 0.9 and 0.99.

In [71]:
import pandas as pd
import numpy as np
import mab_subjects
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter
from neuropy import plotting

df = pd.read_csv("D:/Data/gamma_search_results.csv")
# df = mab_subjects.GroupData().beta_search_results
# x_grid = np.linspace(0.02, 0.1, 25)
# y_grid = np.linspace(0.02, 0.1, 25)
x = df["final_perf_u"]
y = df["final_perf_s"]


fig = plotting.Fig(6, 4)

ax = fig.subplot(fig.gs[0])

ax.scatter(x, y, s=5)
ax.set_xlabel("final performance U")
ax.set_ylabel("final performance S")
ax.set_title("Final Performance")

Text(0.5, 1.0, 'Final Performance')

### Train Network

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from pathlib import Path

basepath = Path("D:/Data/mab/rnn_models/probs_1decimals")
prob_kwargs = dict(high=0.91, low=0.1, decimals=1)

n_sessions = 30000

for i in range(10):
    # ------ Structured network ----------
    b2a_s = BanditTrainer2Arm(model_path=basepath / f"structured_2arm_model{i}.pt")
    b2a_s.train(n_sessions=n_sessions, mode="Struc", return_df=False, **prob_kwargs)
    b2a_s.save_model()

    # ------ Untructured network ----------
    b2a_u = BanditTrainer2Arm(model_path=basepath / f"unstructured_2arm_model{i}.pt")
    b2a_u.train(n_sessions=n_sessions, mode="Unstruc", return_df=False, **prob_kwargs)
    b2a_u.save_model()

### Train Network with custom probabilities

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from banditpy.utils import generate_probs_2arm
from pathlib import Path

n_sessions = 30000

probs = np.array([0.2, 0.3, 0.4, 0.6, 0.7, 0.8])
unstruc_probs, struc_probs = generate_probs_2arm(
    probs, N=n_sessions, frac_impurity=0.16
)

basepath = Path("D:/Data/mab/rnn_models/Train1dec_0.16impure_345reset/")

for i in range(10):
    # ------ Structured network ----------
    b2a_s = BanditTrainer2Arm(
        model_path=basepath / f"structured_2arm_model{i}.pt", device="cpu"
    )
    b2a_s.train(n_sessions=n_sessions, mode=struc_probs, return_df=False)
    b2a_s.save_model()

    # ------ Untructured network ----------
    b2a_u = BanditTrainer2Arm(
        model_path=basepath / f"unstructured_2arm_model{i}.pt", device="cpu"
    )
    b2a_u.train(n_sessions=n_sessions, mode=unstruc_probs, return_df=False)
    b2a_u.save_model()