# Simulation Example

## Imports

In [36]:
from mab import BetaTS
from mab import BernoulliArm, EpsilonGreedy, AB, RandomSelect, montecarlo_simulation, Metric, UCB1, AnnealingSoftmax
from mab import plot
import random
random.seed(30)

## Simulation design
We are using Bernoulli Simulation for this case. We are setting up vector of different rewards. Mu here is porbablity of the reward

In [37]:
mu = [0.01, 0.01, 0.4, 0.1, 0.09]
arms = list(map(lambda x: BernoulliArm(x), mu))
n_arms = len(mu)
horizon = 2500
num_sims = 1000

## Algorythms

In [41]:
algorythms = [
              RandomSelect(n_arms=n_arms),
              EpsilonGreedy(0.8,n_arms=n_arms),
              BetaTS(n_arms=n_arms),
              AB(n_arms=n_arms)]

## Motecarlo Simulation

In [42]:
results = []
for algo in algorythms:
    res = montecarlo_simulation(algo, arms, num_sims, horizon)
    metrics = {}
    metrics['algorythm'] = algo.name
    metrics['accuracy']  = Metric.accuracy(2,
                                      res['times'],
                                      res['chosen_arms'],
                                      num_sims)

    metrics['average_reward'] = Metric.average_reward(res['times'],
                                                  res['rewards'],
                                                  num_sims)

    metrics['cumulative_reward'] = Metric.cumulative_reward(res['times'],
                                                        res['cumulative_rewards'],
                                                        num_sims)
    results.append(metrics)

In [44]:
plot(results[:-1], metric = "accuracy", title="Average Reward").show()

In [46]:
plot(results[:-1], metric = "average_reward", title="Average Reward").show()

In [47]:
plot(results, metric = "cumulative_reward", title="Cumulative Rewards").show()