# Softmax

In [195]:
#Init arms
class BernoulliArm():
    def __init__(self, p):
        self.p = p

    def draw(self):
        if np.random.random() > self.p:
            return 0
        else:
            return 1.0

In [196]:
import numpy as np

In [197]:
#Arms parameters
n = 2 # Number of arms
means = [0.3, 0.9] # Prendre en considération n
arms = []
for mean in means:
    arms.append(BernoulliArm(mean))

In [198]:
# Simulation
counts = np.zeros(n, dtype=int) #Number of counts that we use arm n
values = np.ones(n) # average amount of reward we’ve gotten when playing each of the N arms
times = 500 # Number of turns
chosen_arms = [0.0 for i in range(times)]
rewards = [0.0 for i in range(times)]
cumulative_rewards = 0.0

In [199]:
def softmax(w):
    t  = 0.1
    e = np.exp(w / t)
    dist = e / np.sum(e)
    return dist

def weighted_choice(weights):
    totals = np.cumsum(weights)
    norm = totals[-1]
    throw = np.random.rand()*norm
    return np.searchsorted(totals, throw)

 
# Do the softmax algorithm
def select_arm(values):
    dist = weighted_choice(values)
    return dist

In [200]:
for t in range(times):
    _softmax = softmax(values)
    chosen_arm = select_arm(_softmax)  # Run softmax
    reward = arms[chosen_arm].draw()
    
    #Logging purposes
    chosen_arms[t] = chosen_arm  # Logging
    rewards[t] = reward  # Logging
    
    counts[chosen_arm] += 1
    _n = counts[chosen_arm]
    
    value = values[chosen_arm]
    new_value = ((_n - 1) / float(_n)) * value + (1 / float(_n)) * reward  #empirical mean
    values[chosen_arm] = new_value
    
    cumulative_rewards = cumulative_rewards + reward

print(times) 
print(chosen_arms)
print(rewards)
print(cumulative_rewards)

500
[0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1,