In [1]:
# Import Module
import numpy as np
import pandas as pd
import math
import random
import matplotlib.pyplot as plt
from matplotlib import style

from MAB.bandit import GaussianMultiArmedBandit, BinomialMultiArmedBandit, BernoulliMultiArmedBandit
from MAB.policy import EpsGreedyPolicy, GreedyPolicy, RandomPolicy, UCBPolicy 

In [2]:
n_arms = 5
mu = [0, 4, 2, 3, 5]
sigma = [0.5, 0.4, 0.2, 0.8, 0.7]
bandit = GaussianMultiArmedBandit(n_arms, mu, sigma)

# Test Bandit Operation
n_trials = 10
for _ in range(n_trials) :
    x = random.randint(0, n_arms-1)
    print(f"Action: {x}, Reward: {bandit.pull(x)}")

Gaussian Multi-Armed Bandit Initialized
Arm_0 - mu: 0   sigma: 0.5
Arm_1 - mu: 4   sigma: 0.4
Arm_2 - mu: 2   sigma: 0.2
Arm_3 - mu: 3   sigma: 0.8
Arm_4 - mu: 5   sigma: 0.7

Action: 0, Reward: 0.0
Action: 4, Reward: 5.291599901395978
Action: 0, Reward: 0.506369005081689
Action: 4, Reward: 4.503517975460855
Action: 0, Reward: 0.0
Action: 1, Reward: 3.875728578953362
Action: 2, Reward: 1.8434718465245266
Action: 3, Reward: 3.0225686163035466
Action: 0, Reward: 0.0
Action: 3, Reward: 3.837293508899897


In [3]:
eps = 0.5
policy = EpsGreedyPolicy(eps)
Qs = [0, 4, 2, 3, 5]

# Test Policy Operation
n_trials = 10
for i in range(n_trials) :
    action = policy.get_action(Qs)
    print(f"Trial: {i+1}, Action: {action}")

Trial: 1, Action: 4
Trial: 2, Action: 4
Trial: 3, Action: 4
Trial: 4, Action: 4
Trial: 5, Action: 4
Trial: 6, Action: 4
Trial: 7, Action: 4
Trial: 8, Action: 4
Trial: 9, Action: 4
Trial: 10, Action: 2


In [4]:
c = 2
policy = UCBPolicy(c)
Qs = [0, 4, 2, 3, 5]
Ns = [0, 0, 0, 0, 0]

n_trials = 10
for i in range(n_trials) :
    action = policy.get_action(Qs, Ns)
    Ns[action] += 1
    print(f"Trial: {i+1}, Action: {action}")

Trial: 1, Action: 0
Trial: 2, Action: 1
Trial: 3, Action: 2
Trial: 4, Action: 3
Trial: 5, Action: 4
Trial: 6, Action: 4
Trial: 7, Action: 4
Trial: 8, Action: 4
Trial: 9, Action: 4
Trial: 10, Action: 4
