In [1]:
import numpy as np
import pandas as pd

In [75]:
import numpy as np

# 多臂老虎机类
class MultiArmsBandit:
    def __init__(self, arms, means, stds):
        self.arms = arms
        self.means = means
        self.stds = stds
        
    def pull(self, arm):
        return np.random.normal(self.means[arm], self.stds[arm])


# ϵ-贪心算法类
class EpsilonGreedy:
    def __init__(self, bandit, epsilon=0.1, initial_value=0.1):
        self.bandit = bandit
        self.epsilon = epsilon
        self.counts = np.zeros(bandit.arms)
        self.values = np.full(bandit.arms, initial_value)

    def select_arm(self):
        if np.all(self.values == self.values[0]):
            return np.random.randint(self.bandit.arms)
        if np.random.random() < self.epsilon:
            return np.random.randint(self.bandit.arms)  # 随机探索
        else:
            return np.argmax(self.values)  # 选择目前最优的臂
    
    def update(self, arm, reward):
        self.counts[arm] += 1
        n = self.counts[arm]
        # 更新指定臂的平均奖励
        self.values[arm] = self.values[arm] + (reward - self.values[arm]) / n

    def run(self, n_trials):
        total_reward = 0
        for _ in range(n_trials):
            arm = self.select_arm()
            reward = self.bandit.pull(arm)
            self.update(arm, reward)
            total_reward += reward
        return total_reward
    
    


In [80]:
# 示例使用
if __name__ == "__main__":
    # 定义手臂的数量、均值和标准差
    arms = 3
    means = [0.2, 0.1, 0.3]  # 假设的均值
    stds = [0.02, 0.01, 0.03]  # 假设的标准差
    
    # 创建多臂老虎机实例
    bandit = MultiArmsBandit(arms, means, stds)
    
    # 创建ϵ-贪心算法实例
    epsilon_greedy = EpsilonGreedy(bandit, epsilon=0.9)
    
    # 运行试验
    n_trials = 50000
    total_reward = epsilon_greedy.run(n_trials)
    
    print(f"Total reward after {n_trials} trials: {total_reward}")
    print(f"Average reward per arm: {epsilon_greedy.values}")
    print(f"Number of times each arm was pulled: {epsilon_greedy.counts}")

Total reward after 50000 trials: 10480.04204289335
Average reward per arm: [0.20024559 0.10004513 0.30005972]
Number of times each arm was pulled: [14941. 15157. 19902.]
