In [1]:
# Experiment notebook: Chapter 3 extracted code dry run
from shared_code.common_utils import ZooplusSimulator, run_simulation, plot_results
from shared_code.chapter3_utils import NeuralUCBAgent, FEATURE_DIM
from shared_code.chapter2_utils import LinUCBAgent  # Assuming LinUCBAgent is defined here

# --- Experiment Setup ---
NUM_INTERACTIONS = 20_000
N_PRODUCTS = 50
ALPHA = 1.5


# --- Experiment Parameters ---
NUM_INTERACTIONS = 25_000
N_PRODUCTS = 50
ALPHA = 1.5          # Exploration-exploitation trade-off parameter
LAMBDA = 1.0         # Regularization parameter for LinUCB
LEARNING_RATE = 0.01 # Learning rate for the NeuralUCB optimizer
SEED = 42            # For reproducibility

# --- 3. Initialization ---
print("Initializing simulation environment and agents...")

# Initialize the environment
simulator = ZooplusSimulator(n_products=N_PRODUCTS, seed=SEED)

# **THE FIX: Get feature dimension directly from the initialized simulator**
FEATURE_DIM = simulator.feature_dim 

# Initialize the Disjoint LinUCB Agent from Chapter 2
linucb_agent = LinUCBAgent(
    n_arms=N_PRODUCTS,
    feature_dim=FEATURE_DIM,
    lambda_=LAMBDA,
    alpha=ALPHA
)

# Initialize the NeuralUCB Agent from Chapter 3
neural_agent = NeuralUCBAgent(
    feature_dim=FEATURE_DIM,
    hidden_dims=[100, 100],  # A reasonably expressive network
    lambda_=LAMBDA,          # Note: this lambda is for the UCB matrix, not the network optimizer
    alpha=ALPHA,
    lr=LEARNING_RATE
)

# --- 4. Execution ---
# We store the results in a dictionary for easy plotting
results = {}

# Run the simulation for the LinUCB agent
results["LinUCB (Disjoint)"] = run_simulation(
    agent=linucb_agent,
    simulator=simulator,
    num_interactions=NUM_INTERACTIONS
)

# Re-initialize the simulator with the same seed to ensure the sequence of
# users is identical for a fair comparison.
simulator = ZooplusSimulator(n_products=N_PRODUCTS, seed=SEED)

# Run the simulation for the NeuralUCB agent
results["NeuralUCB (Shared)"] = run_simulation(
    agent=neural_agent,
    simulator=simulator,
    num_interactions=NUM_INTERACTIONS
)


# --- 5. Analysis & Visualization ---
print("\nPlotting results...")
plot_results(results, window_size=500)

Initializing simulation environment and agents...
Running simulation for LinUCBAgent...
Running simulation for LinUCBAgent...


  0%|          | 0/25000 [00:00<?, ?it/s]

Simulation complete. Average reward: 0.4798
Running simulation for NeuralUCBAgent...


  0%|          | 0/25000 [00:00<?, ?it/s]

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn