In [1]:
from collections import deque
import numpy as np
import matplotlib.pyplot as plt
import random

from bbutils import BetaBernoulli, GenerativeModel

In [2]:
k = 1000  # Number of classes

# Thetas from provider for each class
theta_provider = np.random.rand(k)
theta_provider[0] = 0.95

# True thetas of blackbox applied to our data. Class 0 has changed
theta_true = theta_provider.copy()
theta_true[0] = 0.50

In [3]:
# f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True)
# f.suptitle('Provider vs. True Accuracies')
# ax1.bar(range(k), theta_true)
# ax1.set_ylabel('True')
# ax2.bar(range(k), theta_provider)
# ax2.set_ylabel('Provider')
# ax3.bar(range(k), theta_true - theta_provider)
# ax3.set_ylabel('Difference')

In [None]:
n = 100_000
generative_model = GenerativeModel(theta_true)
categories, observations = generative_model.sample(n)

# Best estimate
theta_mean = np.mean(observations[categories==0])

In [None]:
# Queue choices for each category
deques = [deque() for _ in range(k)]
for category, observation in zip(categories, observations):
    deques[category].append(observation)

# Random model
random_strategy_outcome = np.zeros(n)
random_strategy_times_chosen = np.zeros(n)
n_success = 0
total = 0
for i in range(n):
    while True:
        category = random.randrange(k)
        if len(deques[category]) != 0:
            break
    observation = deques[category].pop()
    if category == 0:
        n_success += observation
        total += 1
    random_strategy_outcome[i] = n_success / (total + 1e-13)
    random_strategy_times_chosen[i] = total

In [None]:
# Queue choices for each category
deques = [deque() for _ in range(k)]
for category, observation in zip(categories, observations):
    deques[category].append(observation)
    
# Beta Bernoulli model
model = BetaBernoulli(k)

# Prior
alpha = np.round(theta_provider * 5)
beta = 5 - alpha
model._params = np.vstack((alpha + 1, beta + 1)).T

beta_bernoulli_outcome = np.zeros(n)
beta_bernoulli_times_chosen = np.zeros(n)
n_success = 0
total = 0
for i in range(n):
    theta_hat = model.sample()
    diff = np.abs(theta_hat - theta_provider)
    choices = np.argsort(diff)[::-1]
    for j in range(k):
        category = choices[j]
        if len(deques[category]) != 0:
            break
    observation = deques[category].pop()
    if category == 0:
        n_success += observation
        total += 1
    beta_bernoulli_outcome[i] = n_success / (total + 1e-13)
    beta_bernoulli_times_chosen[i] = total
    model.update(category, observation)

In [None]:
plt.plot(beta_bernoulli_times_chosen, c='blue', label='active')
plt.plot(random_strategy_times_chosen, c='red', label='random')
plt.xlabel('Time')
plt.ylabel('Number of Correct Selections')
plt.legend()

In [None]:
plt.plot(np.abs(beta_bernoulli_outcome - theta_mean), c='blue', label='active')
plt.plot(np.abs(random_strategy_outcome - theta_mean), c='red', label='random')
plt.xlabel('Time')
plt.ylabel('Absolute Error')
plt.legend()