In [11]:
import numpy as np
import matplotlib.pyplot as plt
from ipywidgets import interact, FloatSlider, IntSlider
import pandas as pd
from IPython.display import display

def run_algorithm(lambda_, delta, n_rounds):
    # Initialize
    d = 10
    V0 = lambda_ * np.eye(d)
    theta0 = np.zeros(d)

    # theta* from a multivariable Gaussian distribution
    mu = np.zeros(d)  # mean = 0
    sigma = np.eye(d)  # covariance matrix = I
    theta_star = np.random.multivariate_normal(mu, sigma)

    # 1. decision set
    def generate_decision_set(d, num_actions=100):
        actions = np.random.randn(num_actions, d)
        actions = actions / np.linalg.norm(actions, axis=1, keepdims=True)  # unit ball
        return actions

    # 2. regularized least-squares estimate
    def compute_theta(V, actions, rewards):
        return np.linalg.inv(V).dot(actions.T.dot(rewards))

    # 3. confidence set
    def construct_confidence_set(theta, V, beta):
        return theta, beta * np.linalg.inv(V)  # ellipsoid

    # 4. UCB computation
    def compute_ucb(actions, theta, invV, beta):
        ucb_values = []
        for a in actions:
            ucb_value = theta.dot(a) + beta * np.sqrt(a.T.dot(invV).dot(a))
            ucb_values.append(ucb_value)
        return np.array(ucb_values)

    # compute beta_t (confidence bound parameter)
    def compute_beta_t(t, d, lambda_, delta=0.001):
        return np.sqrt(lambda_) + np.sqrt(
            2 * np.log(1 / delta) + d * np.log((1 + t / (lambda_ * d)))
        )

    # Run the algorithm for n rounds
    actions_list = []
    rewards_list = []
    total_possible_reward = 0
    total_reward = 0
    regrets = []
    distances = []

    for t in range(1, n_rounds + 1):
        At = generate_decision_set(d)
        if t == 1:
            Vt = V0
            theta_t = theta0
        else:
            Vt = V0 + np.dot(np.array(actions_list).T, np.array(actions_list))
            theta_t = compute_theta(Vt, np.array(actions_list), np.array(rewards_list))
        invVt = np.linalg.inv(Vt)
        beta_t = compute_beta_t(t, d, lambda_, delta)
        confidence_set_center, confidence_set_matrix = construct_confidence_set(
            theta_t, Vt, beta_t
        )
        ucb_values = compute_ucb(At, confidence_set_center, invVt, beta_t)
        # 5. select action with highest UCB
        At_selected = At[np.argmax(ucb_values)]
        # 6. simulate observe reward
        Xt = At_selected.dot(theta_star) + np.random.randn()  # add Gaussian noise

        # optimal reward
        optimal_reward = max(At.dot(theta_star))
        total_possible_reward += optimal_reward
        total_reward += Xt

        regret = total_possible_reward - total_reward
        regrets.append(regret)

        # Calculate distance to true theta
        distance = np.linalg.norm(theta_t - theta_star)
        distances.append(distance)

        # 7. Update history
        actions_list.append(At_selected)
        rewards_list.append(Xt)

    # Outputs
    print("Estimated parameter vector:", theta_t)
    print("True parameter vector:", theta_star)
    print("Total Reward:", total_reward)
    print("Total Possible Reward:", total_possible_reward)
    print("Regret:", regret)

    # Create a DataFrame to display estimated and true parameters, and their absolute difference
    df = pd.DataFrame({
        'Estimated Parameter': theta_t,
        'True Parameter': theta_star,
        'Absolute Difference': np.abs(theta_t - theta_star)
    })

    display(df)

    # Compute cumulative regret and regret per trial
    cumulative_regret = np.cumsum(regrets)
    regret_per_trial = cumulative_regret / np.arange(1, n_rounds + 1)

    # Plot the graphs side by side
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

    # Plot 1: Regret per trial (regret/t) vs. trials (t)
    ax1.plot(range(1, n_rounds + 1), regret_per_trial, label="Regret/Trial")
    ax1.set_xlabel("Trial (t)")
    ax1.set_ylabel("Regret/Trial (Regret/t)")
    ax1.set_title("Regret per Trial vs. Trials")
    ax1.legend()
    ax1.grid(True)

    # Plot 2: Distance to true theta over iterations
    ax2.plot(range(1, n_rounds + 1), distances, label="Distance to True Theta", color="blue")
    ax2.set_xlabel("Number of Iterations")
    ax2.set_ylabel("Distance to True Theta")
    ax2.set_title("Closeness to True Parameter (Theta)")
    ax2.legend()
    ax2.grid(True)

    plt.tight_layout()
    plt.show()

In [12]:
interact(
    run_algorithm,
    lambda_=FloatSlider(min=0.001, max=1.0, step=0.01, value=0.01),
    delta=FloatSlider(min=0.0001, max=0.1, step=0.0001, value=0.001),
    n_rounds=IntSlider(min=100, max=2000, step=100, value=1000)
)

interactive(children=(FloatSlider(value=0.01, description='lambda_', max=1.0, min=0.001, step=0.01), FloatSlid…

<function __main__.run_algorithm(lambda_, delta, n_rounds)>