In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from typing import Optional, Sequence

import numpy as np

from coba import CobaRandom, Environments, RandomLearner, Experiment, VowpalBagLearner
from coba.environments import LambdaSimulation
from coba.experiments.results import CustomResult
from coba.experiments.tasks import SimpleEvaluation
from coba.primitives import Context, Action

# Run Simulation

In [None]:
class CustomEnvironment(LambdaSimulation):
    def __init__(self, n_interactions: Optional[int] = 1000):
        super().__init__(n_interactions, self.context, self.actions, self.rewards)
        self.r = CobaRandom(1)

    def actions(self, index: int, context: Context) -> Sequence[Action]:
        """
        actions: A function that should return all valid actions for a given index, context and random state.
        """
        return [0, 1]

    def context(self, index: int) -> Context:
        return {
            "feature_1": self.r.randoms(1)[0],
            "feature_2": self.r.randoms(1)[0],
            "feature_3": self.r.randoms(1)[0]
        }

    def rewards(self, index: int, context: Context, action: Action) -> float:
        # reward centered around 0.5, with feature_1 strongly positively correlating with the reward for the first action 
        # and feature_2 half as much negatively correlating
        reward_probabilities_for_actions = [
            0.5 
                + 1.0 * (context["feature_1"] - 0.5) 
                - 0.5 * (context["feature_2"] - 0.5),
            0.5
        ]
        
        reward_probabilities_for_actions = [min(1.0, max(0.0, prob)) for prob in reward_probabilities_for_actions]
        return np.random.binomial(1, reward_probabilities_for_actions[action])

environments = Environments([CustomEnvironment(5000)]).shuffle(n=4)
learners = [
    VowpalBagLearner(features=[1, 'x', 'a', 'axx']),
    VowpalBagLearner(features=[1, 'x', 'a', 'axx'], epsilon=0.01),
    VowpalBagLearner(features=[1, 'x', 'a', 'axx'], epsilon=0.05),
    VowpalBagLearner(features=[1, 'x', 'a', 'axx'], epsilon=0.1),
    VowpalBagLearner(features=[1, 'x', 'a', 'axx'], epsilon=0.2),
]
learners.append(RandomLearner())

result = Experiment(
    environments,
    learners,
    evaluation_task=SimpleEvaluation(record=['reward','probability','action','context'])
).run()
result.__class__ = CustomResult

In [None]:
result.plot_learners()

In [None]:
df = result.interactions.to_pandas()
df.hist(column='reward')

In [None]:
result.plot_overview()

In [None]:
result.eval_metrics()