# Simulation of Multiarm Bandit Algorythms
## Imports

In [6]:
from mab import *
import numpy as np
import plotly.graph_objects as go
import random
random.seed(30)

In [7]:
def plot(results, metric="accuracy", title=""):
    """ Help function to make plotly charts of the metric results
        results - dictionaries with results of experiments
    """
    fig = go.Figure()
    for res in results:
        fig.add_trace(go.Scatter(x=np.array(range(horizon)),
                                 y=np.array(res[metric]),
                                 mode='lines', name=res['algorythm']))

    fig.update_layout(
        title=title,
        autosize=True,
    )
    return fig

## Test Design

In [8]:
distribution_params = [[0,0.5],[0,0.5], [0,0.1], [0,0.8], [0,1]]
arms = list(map(lambda x: UniformArm(x[0],x[1]), distribution_params))
n_arms = len(distribution_params)
horizon = 250
num_sims = 5000

In [4]:
algorythms = [EpsilonGreedy(0.1, n_arms=n_arms),
              EpsilonGreedy(0.5, n_arms=n_arms),
              EpsilonGreedy(0.9, n_arms=n_arms),
              Softmax(0.1, n_arms=n_arms),
              Softmax(0.5, n_arms=n_arms),
              RandomSelect(n_arms=n_arms),
              AnnealingSoftmax(n_arms=n_arms),
              UCB1(n_arms=n_arms),
              AB(n_arms=n_arms)]

In [9]:
results = []
for algo in algorythms:
    res = test_alorythm(algo, arms, num_sims, horizon)

    res['algorythm'] = algo.name
    res['accuracy'] = Metric.accuracy(4,
                                      res['times'],
                                      res['chosen_arms'],
                                      num_sims)

    res['average_reward'] = Metric.average_reward(res['times'],
                                                  res['rewards'],
                                                  num_sims)

    res['cumulative_reward'] = Metric.cumulative_reward(res['times'],
                                                        res['cumulative_rewards'],
                                                        num_sims)
    results.append(res)

In [10]:
fig = plot(results[:-1], metric = "accuracy", title="Accuracy")
fig.show()

In [6]:
fig = plot(results[:-1], metric = "average_reward", title="Average Reward")
fig.show()

In [None]:
fig = plot(results, metric = "cumulative_rewards", title="Cumulative Rewards")
#fig.add_trace(go.Scatter(x=np.array(range(horizon)),
#                         y=np.array(range(horizon)),
#                         mode='lines', name="Best Possible"))
fig.show()