In [12]:
import numpy as np

class MAB:
    def __init__(self, n_arms, epsilon=0.3):
        self.n_arms = n_arms  # Number of arms
        self.epsilon = epsilon  # Exploration rate
        self.arm_counts = np.zeros(n_arms)  # Count of pulls for each arm
        self.arm_loss = np.zeros(n_arms)  # Total loss for each arm

    def select_arm(self):
        """Selects an arm based on the ε-greedy strategy."""
        if np.random.rand() < self.epsilon:  # Explore
            chosen_arm = np.random.randint(self.n_arms)
            print(f"Exploring: Selected arm {chosen_arm}")
        else:  # Exploit
            # Select the arm with the minimum average loss
            avg_losses = self.arm_loss / (self.arm_counts + 1e-5)  # Adding a small value to avoid division by zero
            chosen_arm = np.argmin(avg_losses)
            print(f"Exploiting: Selected arm {chosen_arm} with min avg loss")
        return chosen_arm

    def update(self, chosen_arm, loss):
        """Updates the estimated losses and counts for the chosen arm."""
        self.arm_counts[chosen_arm] += 1
        self.arm_loss[chosen_arm] += loss
        print(f"Updated losses for arm {chosen_arm}: Total loss = {self.arm_loss[chosen_arm]}, Pull count = {self.arm_counts[chosen_arm]}")

# Create the MAB instance with corrections and verbose output
n_arms = 5  # For example, setting 5 arms
mab = MAB(n_arms=n_arms, epsilon=0.3)

# Run a simple test with 10 rounds
for _ in range(10):
    chosen_arm = mab.select_arm()
    # Simulate a loss for the chosen arm. Here, just using a random float as an example.
    loss = np.random.random()
    mab.update(chosen_arm, loss)

    

Exploiting: Selected arm 0 with min avg loss
Updated losses for arm 0: Total loss = 0.9602702279161014, Pull count = 1.0
Exploiting: Selected arm 1 with min avg loss
Updated losses for arm 1: Total loss = 0.05996218923446206, Pull count = 1.0
Exploiting: Selected arm 2 with min avg loss
Updated losses for arm 2: Total loss = 0.11864004806287531, Pull count = 1.0
Exploiting: Selected arm 3 with min avg loss
Updated losses for arm 3: Total loss = 0.480284135234188, Pull count = 1.0
Exploiting: Selected arm 4 with min avg loss
Updated losses for arm 4: Total loss = 0.5856872384672509, Pull count = 1.0
Exploiting: Selected arm 1 with min avg loss
Updated losses for arm 1: Total loss = 1.0430345681882147, Pull count = 2.0
Exploiting: Selected arm 2 with min avg loss
Updated losses for arm 2: Total loss = 0.8984483904339121, Pull count = 2.0
Exploring: Selected arm 3
Updated losses for arm 3: Total loss = 1.0244212387826221, Pull count = 2.0
Exploring: Selected arm 2
Updated losses for arm 2