From b2eba75a5f3c951c8b558edfdfde7d375de510a1 Mon Sep 17 00:00:00 2001 From: norases Date: Fri, 22 Aug 2014 11:45:19 -0700 Subject: [PATCH] Updated bandit BLA documentation for get_bla_payoff function. --- moe/bandit/bla.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/moe/bandit/bla.py b/moe/bandit/bla.py index 4d3551f9..ce3aca52 100644 --- a/moe/bandit/bla.py +++ b/moe/bandit/bla.py @@ -51,11 +51,18 @@ def __init__( def get_bla_payoff(self, sampled_arm): r"""Compute the BLA payoff using the BLA subtype formula. - BLA payoff is computed by sampling from a beta distribution :math`Beta(\alpha, \beta)` + BLA payoff is computed as follows: + + .. math:: r_j = Sample(Beta(\alpha_j, \beta_j)) + + where :math:`\alpha_j` is the number of arm *j* wins + 1 (``sampled_arm.win`` + 1) and + :math:`\beta_j` is the number of arm *j* losses + 1 (``sampled_arm.total`` - ``sampled_arm.win`` + 1). + + In other words, BLA payoff is computed by sampling from a beta distribution :math:`Beta(\alpha, \beta)` with :math:`\alpha = number\_wins + 1` and :math:`\beta = number\_losses + 1 = number\_total - number\_wins + 1`. - Note that for an unsampled_arm, :math`Beta(1, 1)` is a uniform distribution. + Note that for an unsampled arm, :math:`Beta(1, 1)` is a uniform distribution. Learn more about beta distribution at http://en.wikipedia.org/wiki/Beta_distribution. :param sampled_arm: a sampled arm