### Hyperparameter search

In [None]:
import numpy as np
from banditpy.models import BanditTrainer2Arm
import pandas as pd


def adaptive_hyperparameter_search():
    """Adaptive search that learns from previous results"""

    # Start with educated guesses
    candidates = [
        {"lr": 1e-4, "beta_entropy": 0.1, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 5e-4, "beta_entropy": 0.15, "beta_value": 0.1, "hidden_size": 48},
        {"lr": 1e-4, "beta_entropy": 0.2, "beta_value": 0.1, "hidden_size": 48},
    ]

    results = []

    for round_num in range(3):  # 3 rounds of refinement
        print(f"\n--- Round {round_num + 1} ---")

        round_results = []
        for i, params in enumerate(candidates):
            print(f"Testing: {params}")

            bt = BanditTrainer2Arm(**params, model_path=f"adaptive_{round_num}_{i}.pt")
            bt.train(mode="U", n_sessions=2500, n_trials=200)

            metrics = bt.comprehensive_evaluation()

            result = params.copy()
            result.update(metrics)
            result["round"] = round_num
            round_results.append(result)
            results.append(result)

        # Find best and generate new candidates around it
        round_df = pd.DataFrame(round_results)
        best = round_df.loc[round_df["composite_score"].idxmax()]

        # Generate new candidates around best
        candidates = generate_candidates_around_best(best)

    return pd.DataFrame(results)


def generate_candidates_around_best(best_params):
    """Generate new candidates around best parameters"""
    candidates = []

    # Variations around best
    variations = [
        {"lr_mult": 0.7, "entropy_mult": 0.8, "value_mult": 1.0},
        {"lr_mult": 1.0, "entropy_mult": 1.2, "value_mult": 0.8},
        {"lr_mult": 1.3, "entropy_mult": 1.0, "value_mult": 1.2},
        {"lr_mult": 0.8, "entropy_mult": 1.5, "value_mult": 1.0},
    ]

    for var in variations:
        candidate = {
            "lr": best_params["lr"] * var["lr_mult"],
            "beta_entropy": best_params["beta_entropy"] * var["entropy_mult"],
            "beta_value": best_params["beta_value"] * var["value_mult"],
            "hidden_size": int(best_params["hidden_size"]),
        }
        candidates.append(candidate)

    return candidates

### Train Network

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from banditpy.models import BanditTrainer2Arm
from pathlib import Path

basepath = Path("D:/Data/mab/rnn_models/")
n_sessions = 30000

for i in range(10):
    # ------ Structured network ----------
    b2a_s = BanditTrainer2Arm(model_path=basepath / f"structured_2arm_model{i}.pt")
    b2a_s.train(n_sessions=n_sessions, mode="Struc", return_df=False)
    b2a_s.save_model()

    # ------ Untructured network ----------
    b2a_u = BanditTrainer2Arm(model_path=basepath / f"unstructured_2arm_model{i}.pt")
    b2a_u.train(n_sessions=n_sessions, mode="Unstruc", return_df=False)
    b2a_u.save_model()