# Epsilon greedy algorithm

In [35]:
#Init arms
class BernoulliArm():
    def __init__(self, p):
        self.p = p

    def draw(self):
        if np.random.random() > self.p:
            return 0.0
        else:
            return 1.0

In [36]:
import numpy as np

## INITIATION
epsilon = 0.1

#Arms parameters
n = 4 # Number of arms
means = [0.3, 0.4, 0.3, 0.4] # Prendre en considération n
arms = []
for mean in means:
    arms.append(BernoulliArm(mean))

# Simulation
counts = np.zeros(n, dtype=int) #Number of counts that we use arm n
values = np.zeros(n) # average amount of reward we’ve gotten when playing each of the N arms 
times = 500 # Number of turns
chosen_arms = [0.0 for i in range(times)]
rewards = [0.0 for i in range(times)]
cumulative_rewards = 0.0

In [37]:
# Do the e-greedy algorithm
def select_arm(t):
    if np.random.random() > epsilon:
        if t == 0:
            return np.random.randint(len(values)) # First time
        else:
            return np.argmax(values)  #Exploitation
    else:
        return np.random.randint(len(values))  #Exploration

In [38]:
for t in range(times):
    chosen_arm = select_arm(t)  # Run e-greedy
    reward = arms[chosen_arm].draw()
    
    #Logging purposes
    chosen_arms[t] = chosen_arm  # Logging
    rewards[t] = reward  # Logging
    
    counts[chosen_arm] += 1
    _n = counts[chosen_arm]
    
    value = values[chosen_arm]
    new_value = ((_n - 1) / float(_n)) * value + (1 / float(_n)) * reward  #empirical mean
    values[chosen_arm] = new_value
    
    cumulative_rewards += reward

print(times) 
print(chosen_arms)
print(rewards)
print(cumulative_rewards)

500
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3, 1, 3, 3, 3, 3, 3, 2, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 0, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 1, 3, 3, 3, 3, 3, 2, 0, 3, 3, 3, 2, 3, 3, 3, 3, 3, 0, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3,