In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from time import time

from tqdm import tqdm
from discovery import Chen, LLR, UCB1

from discoveryV2 import Chen as ChenSUB
from discoveryV2 import LLR as LLRSUB

In [3]:
# Definimos la semilla para la replicabilidad del código
np.random.seed(8262022)

# Número de cuadrantes
M = 1000
# Cantidad de brazos (o policías)
K = 50
# Períodos de tiempo
T_max = 10000
# Probabilidad de observar el crimen perfectamente en los cuadrantes no visitados
p = 0
# Media del crimen por cudrante
mu_reales = np.random.randint(low = 4, high = 100, size = M)
# Ahora vamos a encontrar rho que es la probabilidad de éxito de nuestra binomial
# N puede ser cualquier cosa pero dejamos un número interpretable
N = 1000
rho = mu_reales / N

q = np.random.uniform(size=M)
# q = .5 * np.ones(M)

variance_tolerance = .1
distance_tolerance = .1

use_distance = True
distnace_f = lambda x_1, x_2: np.linalg.norm(x_1 - x_2)

In [4]:
llr = LLR(M, K, N, q, rho, underreporting=False)
ucb1 =  UCB1(M, K, N, q, rho)


llr_sub = LLRSUB(M, K, N, q, rho, underreporting=False)

# Chen

In [5]:
chen = Chen(M, K, N, rho, q)

# Total number of times arm i is played so far
T_i = np.zeros(M)

# Mean outcomes
mu_hat = np.ones(M)

X_T = np.zeros((T_max, M))

S_T = np.zeros((T_max, M))

historical_mu_hat = np.zeros((T_max, M))

historical_mu_bar = np.zeros((T_max, M))

t = 0
while (use_distance and distnace_f(mu_hat, rho) > distance_tolerance) or \
    (not use_distance and distnace_f(mu_hat, historical_mu_hat[t]) > variance_tolerance):

    t += 1

    # save historical mu_hat's
    historical_mu_hat[t - 1] = mu_hat

    # update rule
    mu_bar = chen.update_rule(t, mu_hat, T_i)
    historical_mu_bar[t - 1] = mu_bar

    #oracle
    S = chen.oracle(mu_bar)

    # 0-index
    S_T[t - 1] = S
    T_i[S.astype(bool)] += 1
    
    # update m_hat, t_i
    mu_hat = chen.update_mu_hat_t_i(S, X_T, t, S_T)

res_dict = {
    'X_T': X_T[:t],
    'S_T': S_T[:t],
    'T_i': T_i[:t], 
    'historical_mu_hat': historical_mu_hat[:t],
    't': t}

chen.set_results_dictionary(res_dict)

del X_T, S_T, T_i, historical_mu_hat 


In [5]:
rho[:10]

array([0.096, 0.06 , 0.078, 0.045, 0.062, 0.072, 0.005, 0.031, 0.048,
       0.05 ])

In [6]:
chen.results.get('historical_mu_hat')[-1][:10]

array([0.0945    , 0.06133333, 0.0814    , 0.04566667, 0.06277778,
       0.0696    , 0.006     , 0.032     , 0.04644444, 0.04888889])