Since $p(\theta|x) \propto p(\theta, x) = p(x|\theta)p(\theta) \propto \theta^{x + \alpha - 1}(1 - \theta)^{n - x + \beta - 1}$, the posterior distribution is proportional to $Beta(\alpha' = \alpha + x, \beta' = \beta + n - x)$. We can initialize $\alpha$ and $\beta$ as $1$, and update using $n$ and $x$. $n$ is the number of tries and $x$ is the number of successes.

In [106]:
from scipy import stats
import numpy as np

# Set random seed (comment out to see different result for each run)
np.random.seed(1007372843)

# True probabilities of each option. There are three options.
prob = [0.25, 0.5, 0.75]

# Total number of tries
N = 100

# Initialize n (number of tries for each option), and x (number of successes for each option) as 0.
n = [0, 0, 0]
x = [0, 0, 0]

for i in range(1, N + 1):
    
    # 1 is added in both a and b to initialize prior distributions as the beta distribution with alpha = 1 and beta = 1,
    # which is equal to the uniform distributions with min = 0 and max = 1
    post_A = stats.beta(a = 1 + x[0], b = 1 + n[0] - x[0])
    post_B = stats.beta(a = 1 + x[1], b = 1 + n[1] - x[1])
    post_C = stats.beta(a = 1 + x[2], b = 1 + n[2] - x[2])
    
    # Sample three probabilities of theta given x for each option, from the corresponding posterior distributions.
    prob_theta = [post_A.rvs(size = 1), post_B.rvs(size = 1), post_C.rvs(size = 1)]
    
    # Choose the choice (index 0, 1, or 2) which corresponds to highest probability of theta given x
    choice = np.argmax(prob_theta)
    
    # Print to see what choices are being made.
    print(choice)
    
    # Update the number of tries for chosen option
    n[choice] += 1
    
    # Update the number of successes for chosen option if the sampled outcome is a success.
    if stats.binom(n = 1, p = prob[choice]).rvs(size=1)[0] == 1:
        x[choice] += 1
        
# Display number of tries for each option 
n

1
0
2
2
2
0
2
0
0
2
2
0
2
2
2
0
2
2
0
1
2
2
0
0
2
2
2
1
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
2
2


[9, 4, 87]