In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
import itertools
import os
import pandas as pd
import pickle
import random

np.random.seed(123)
torch.manual_seed(123)


<torch._C.Generator at 0x144d6602a50>

## **Environment Class**

In [3]:
class BanditEnvironment:
    def __init__(self, max_steps = 100):
        self.num_actions = 11  # Number of actions (arms/bandits)
        self.max_steps = max_steps  # Maximum number of steps per episode
        self.reset()


    def reset(self):
        """Reset the environment to its initial state."""

        # Obtain the target bandit for the episode (the one with higer variance).
        self.target_bandit = np.random.randint(low = 0, high=10, size=None, dtype=int)
        # All bandits with Reward = 1
        self.bandits_reward = np.ones(11, dtype=np.float32)
        # A target bandit with reward = 5
        self.bandits_reward[self.target_bandit] = 5
        # A informative bandit that its reward is 1/10 the target bandit (considering bandits from 1 to 10, index from 0 to 9)
        self.bandits_reward[10] = np.round(np.float32((self.target_bandit + 1)/10),1)

        self.timestep = 0  # Initialize timestep
        self.last_action = None
        self.last_reward = 0
        return self.get_state()

    def step(self, action):
        """Take an action and return the next state, reward, and done flag."""
        self.timestep += 1
        self.last_action = action
        reward = self.bandits_reward[action] # Get reward
        self.last_reward = reward
        done = self.timestep >= self.max_steps  # End episode after 100 timesteps
        return self.get_state(), reward, done

    def get_state(self):
        """Return the current state: timestep, last action, and last reward."""

        # OH last action
        one_hot_action = np.zeros(self.num_actions)
        if self.last_action is not None:
          one_hot_action[self.last_action] = 1

        # OH reward
        one_hot_reward = np.zeros(11) # 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 5
        # Si se eligio el target, se marca oh recompensa = 5
        if self.last_action == self.target_bandit:
          one_hot_reward[10] = 1
        # Si se eligio otro no informativo, se marca oh recompensa = 1
        elif (self.last_action != 10) and self.last_action is not None:
          one_hot_reward[9] = 1
        # Si se elgio el informativo, el index del brazo target
        elif self.last_action == 10:
          one_hot_reward[self.target_bandit] = 1

        return np.concatenate([one_hot_action, one_hot_reward, np.array([self.timestep], dtype=np.float32)])


In [32]:
## Example Environement
env = BanditEnvironment()
state = env.reset()
print("Bandit Initial State (action, r, ts):", state)
print("Target Bandit (index):", env.target_bandit)
print("Bandits Reward:", env.bandits_reward)
action = 3
print(f"Taking action {action} (index)....")
next_state, reward, done = env.step(action)
print("Reward:", reward)
print("Next Bandit State:", next_state)
action = env.target_bandit
print(f"Taking action {action} (index)....")
next_state, reward, done = env.step(action)
print("Reward:", reward)
print("Next Bandit State:", next_state)
action = 10
print(f"Taking action {action} (index)....")
next_state, reward, done = env.step(action)
print("Reward:", reward)
print("Next Bandit State:", next_state)


Bandit Initial State (action, r, ts): [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Target Bandit (index): 7
Bandits Reward: [1.  1.  1.  1.  1.  1.  1.  5.  1.  1.  0.8]
Taking action 3 (index)....
Reward: 1.0
Next Bandit State: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 1.]
Taking action 7 (index)....
Reward: 5.0
Next Bandit State: [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 2.]
Taking action 10 (index)....
Reward: 0.8
Next Bandit State: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 3.]


In [26]:
def metrics(total_reward):

    # Cumulative Regret
    optimal_reward = 5 * max_steps
    cumulative_regret = optimal_reward - total_reward

    return cumulative_regret

In [33]:
class A2C_LSTM_Agent(nn.Module):
    def __init__(self, input_dim, hidden_size, num_actions):
        super(A2C_LSTM_Agent, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_size, batch_first=True)
        self.actor_layer = nn.Linear(hidden_size, num_actions)
        self.critic_layer = nn.Linear(hidden_size, 1)
        nn.init.xavier_uniform_(self.actor_layer.weight, gain=0.01)
        nn.init.xavier_uniform_(self.critic_layer.weight, gain=1.0)

    def forward(self, x, hidden_state):
        # x shape: (batch_size, seq_len, input_dim)
        lstm_out, hidden_state = self.lstm(x, hidden_state)
        lstm_out = lstm_out[:, -1, :]  # Use the output of the last timestep
        action_probs = F.softmax(self.actor_layer(lstm_out), dim=-1)
        value = self.critic_layer(lstm_out)
        return action_probs, value, hidden_state

    def init_hidden_state(self, batch_size=1):
        # Initialize the LSTM hidden and cell states
        return (torch.zeros(1, batch_size, self.hidden_size).to(device),
                torch.zeros(1, batch_size, self.hidden_size).to(device))

In [34]:
def train_agent(TRAIN = True, n_episodes = 25000, gamma = 0.99, learning_rate = 0.001, entropy_decay = False):

    stats = {'Episode Returns': [], 'Optimal Action': [], 'Bandits_Reward': [], 'Cumulative Regret': [], 'Actions-Chosen List': [], 'First Action': []}

    for episode in tqdm(range(1, n_episodes + 1)):

        # Restart Constants, Environmentes and LSTM Hidden-State
        actions_chosen_list = [] if TRAIN == False else None
        actions_chosen = np.zeros(num_actions)
        state = env.reset()
        state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0).unsqueeze(0)  # Add batch and sequence dimensions
        hidden_state = agent.init_hidden_state()

        total_reward = 0
        done = False

        while not done:

            # Forward pass
            action_probs, value, hidden_state = agent(state, (hidden_state[0].detach(), hidden_state[1].detach()))

            # Sample action from probability distribution
            action_dist = torch.distributions.Categorical(action_probs)
            action = action_dist.sample()
            log_prob = action_dist.log_prob(action)

            # Take action in the environment
            next_state, reward, done = env.step(action.item())
            total_reward += reward

            # Prepare next state
            next_state = torch.FloatTensor(next_state).unsqueeze(0).unsqueeze(0).to(device)

            # Trainning
            if TRAIN == True:

                # Compute TD target and advantage
                with torch.no_grad():
                    _, next_value, _ = agent(next_state, hidden_state) #if not done else (None, torch.tensor(0.0), None)
                    td_target = reward + gamma * next_value
                advantage = td_target - value

                # Compute losses
                actor_loss = -log_prob * advantage.detach()
                critic_loss = F.mse_loss(value, td_target)
                entropy_loss = -action_dist.entropy().mean()
                entropy_coef = max(0.005, 0.1 * (1 - episode / n_episodes)) if entropy_decay == True else 0.005

                loss = actor_loss + 0.05 * critic_loss + entropy_coef * entropy_loss

                # Optimize the network
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            else:
                actions_chosen_list.append(action.item())

            # Move to the next state
            state = next_state

            # Add action
            actions_chosen[action.item()] += 1

            # Save First Action
            if env.timestep == 1:
              first_action = env.last_action

        # Cumulative Regret:
        cumulative_regret_value = metrics(total_reward)

        #Save Episode stats
        stats['Episode Returns'].append(total_reward)
        stats['Optimal Action'].append(env.target_bandit)
        stats['Bandits_Reward'].append(env.bandits_reward)
        stats['Actions-Chosen List'].append(actions_chosen_list)
        stats['Cumulative Regret'].append(cumulative_regret_value)
        stats['First Action'].append(first_action)

        if len(stats['Cumulative Regret']) >= 100:
            mean_last_100_regret = np.mean(stats['Cumulative Regret'][-100:]).round(1)
        else:
            mean_last_100_regret = np.mean(stats['Cumulative Regret']).round(1)

        if episode % 100 == 0:
            print(f"Ep {episode}/{n_episodes}, Opt. Action: {env.target_bandit}, Reward: {total_reward}, Cumulative-Regret: {cumulative_regret_value}, AVG100-Regret: {mean_last_100_regret}, First Action {first_action}")

    return stats



In [35]:
def save_train_stats(stats, model_name, discount_factor_gamma, learning_rate, entropy_decay):

    stats = pd.DataFrame(stats)

    stats['model_name'] = model_name
    stats['entropy_decay'] = entropy_decay
    stats.to_pickle(google_drive_folder + '/' + 'train_stats' + '/' + model_name + '.pkl')

def save_test_stats(stats, model_name, discount_factor_gamma, learning_rate, entropy_decay):

    stats = pd.DataFrame(stats)

    stats['model_name'] = model_name
    stats['entropy_decay'] = entropy_decay
    stats.to_pickle(google_drive_folder + '/' + 'test_stats' + '/' + model_name + '.pkl')



In [36]:
# Set Google Drive Folder
google_drive_folder = './Monografia/Exp3/'
os.makedirs(google_drive_folder + 'models', exist_ok=True)
os.makedirs(google_drive_folder + 'train_stats', exist_ok=True)
os.makedirs(google_drive_folder + 'test_stats', exist_ok=True)

# Hyperparameters
GAMMA_LIST = [0.8, 0.9, 1]
LR_LIST = [0.0001, 0.001]
ENTROPY_DECAY = [False, True]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hidden_size = 48
max_steps = 5
n_episodes_train = 25000
n_episodes_test= 300


hyperparameters = list(itertools.product(GAMMA_LIST, LR_LIST, ENTROPY_DECAY))
random.shuffle(hyperparameters)

for i, (gamma, learning_rate, entropy_decay) in enumerate(hyperparameters):

    print(f'\nGAMMA {gamma} - LR {learning_rate} - Entropy Decay {entropy_decay}')

    # Environment and agent setup
    env = BanditEnvironment(max_steps = max_steps)
    num_actions = env.num_actions
    input_dim = 1 + num_actions + 11  # Timestep, past action oh, past reward oh
    agent = A2C_LSTM_Agent(input_dim, hidden_size, num_actions).to(device)
    optimizer = optim.Adam(agent.parameters(), lr=learning_rate)

    # Train
    stats = train_agent(TRAIN = True, n_episodes = n_episodes_train, gamma = gamma, learning_rate = learning_rate, entropy_decay = entropy_decay)

    # Save Model and Train Stats
    agent.eval()
    model_name = f'LR_{str(learning_rate).replace(".","_")}__GAMMA_{str(gamma).replace(".","_")}__EntropyDecay_{str(entropy_decay)}'
    torch.save(agent.state_dict(), google_drive_folder + 'models/'+ model_name + '.pth')
    save_train_stats(stats, model_name, gamma, learning_rate, entropy_decay)

    print(f'\nTEST:')
    # Test and save stats
    stats = train_agent(TRAIN = False, n_episodes = n_episodes_test, entropy_decay = entropy_decay)
    save_test_stats(stats, model_name, gamma, learning_rate, entropy_decay)




GAMMA 0.9 - LR 0.0001 - Entropy Decay True


  0%|          | 104/25000 [00:02<11:57, 34.71it/s]

Ep 100/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.0, First Action 8


  1%|          | 210/25000 [00:05<08:20, 49.50it/s]

Ep 200/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.3, First Action 10


  1%|          | 304/25000 [00:06<08:32, 48.21it/s]

Ep 300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  2%|▏         | 409/25000 [00:09<08:15, 49.61it/s]

Ep 400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


  2%|▏         | 508/25000 [00:11<08:24, 48.51it/s]

Ep 500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


  2%|▏         | 606/25000 [00:13<08:13, 49.41it/s]

Ep 600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


  3%|▎         | 703/25000 [00:15<11:06, 36.46it/s]

Ep 700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


  3%|▎         | 805/25000 [00:17<07:57, 50.63it/s]

Ep 800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


  4%|▎         | 905/25000 [00:19<08:12, 48.95it/s]

Ep 900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  4%|▍         | 1007/25000 [00:22<08:02, 49.73it/s]

Ep 1000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


  4%|▍         | 1110/25000 [00:24<07:55, 50.26it/s]

Ep 1100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


  5%|▍         | 1208/25000 [00:26<07:57, 49.79it/s]

Ep 1200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  5%|▌         | 1307/25000 [00:28<10:21, 38.15it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


  6%|▌         | 1409/25000 [00:31<07:54, 49.76it/s]

Ep 1400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


  6%|▌         | 1508/25000 [00:33<07:55, 49.36it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  6%|▋         | 1609/25000 [00:35<07:55, 49.21it/s]

Ep 1600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  7%|▋         | 1709/25000 [00:37<07:48, 49.69it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


  7%|▋         | 1808/25000 [00:39<07:50, 49.26it/s]

Ep 1800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 2


  8%|▊         | 1907/25000 [00:41<10:05, 38.15it/s]

Ep 1900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


  8%|▊         | 2005/25000 [00:44<08:01, 47.77it/s]

Ep 2000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  8%|▊         | 2106/25000 [00:46<07:36, 50.16it/s]

Ep 2100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 7


  9%|▉         | 2207/25000 [00:48<07:42, 49.33it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  9%|▉         | 2309/25000 [00:50<07:44, 48.89it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 8


 10%|▉         | 2404/25000 [00:52<07:23, 50.96it/s]

Ep 2400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 10%|█         | 2506/25000 [00:54<09:43, 38.57it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 10%|█         | 2609/25000 [00:57<08:48, 42.33it/s]

Ep 2600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 3


 11%|█         | 2707/25000 [00:59<07:23, 50.24it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 11%|█         | 2807/25000 [01:01<07:33, 48.95it/s]

Ep 2800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 12%|█▏        | 2908/25000 [01:03<07:21, 50.00it/s]

Ep 2900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 12%|█▏        | 3009/25000 [01:05<07:40, 47.77it/s]

Ep 3000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 12%|█▏        | 3105/25000 [01:07<09:05, 40.12it/s]

Ep 3100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 3


 13%|█▎        | 3205/25000 [01:10<10:08, 35.82it/s]

Ep 3200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 13%|█▎        | 3307/25000 [01:12<07:15, 49.86it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 14%|█▎        | 3405/25000 [01:14<07:14, 49.76it/s]

Ep 3400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 14%|█▍        | 3506/25000 [01:16<07:18, 48.99it/s]

Ep 3500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 14%|█▍        | 3608/25000 [01:18<07:22, 48.39it/s]

Ep 3600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 15%|█▍        | 3705/25000 [01:20<07:32, 47.07it/s]

Ep 3700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 15%|█▌        | 3807/25000 [01:22<09:48, 36.00it/s]

Ep 3800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 16%|█▌        | 3906/25000 [01:25<09:11, 38.25it/s]

Ep 3900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 7


 16%|█▌        | 4006/25000 [01:28<07:02, 49.72it/s]

Ep 4000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 16%|█▋        | 4106/25000 [01:30<06:56, 50.14it/s]

Ep 4100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 7


 17%|█▋        | 4206/25000 [01:32<07:01, 49.34it/s]

Ep 4200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 17%|█▋        | 4305/25000 [01:34<07:14, 47.61it/s]

Ep 4300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


 18%|█▊        | 4404/25000 [01:36<09:37, 35.66it/s]

Ep 4400/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 1


 18%|█▊        | 4507/25000 [01:39<07:01, 48.57it/s]

Ep 4500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 8


 18%|█▊        | 4609/25000 [01:41<07:08, 47.58it/s]

Ep 4600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 19%|█▉        | 4708/25000 [01:43<07:08, 47.34it/s]

Ep 4700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 19%|█▉        | 4807/25000 [01:45<07:02, 47.74it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 20%|█▉        | 4909/25000 [01:47<06:56, 48.27it/s]

Ep 4900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 3


 20%|██        | 5004/25000 [01:49<08:41, 38.36it/s]

Ep 5000/25000, Opt. Action: 2, Reward: 3.600000023841858, Cumulative-Regret: 21.399999976158142, AVG100-Regret: 17.8, First Action 0


 20%|██        | 5108/25000 [01:52<06:47, 48.81it/s]

Ep 5100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 21%|██        | 5206/25000 [01:54<06:59, 47.17it/s]

Ep 5200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 21%|██        | 5308/25000 [01:56<06:47, 48.35it/s]

Ep 5300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 22%|██▏       | 5408/25000 [01:58<06:46, 48.24it/s]

Ep 5400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 22%|██▏       | 5509/25000 [02:00<06:46, 47.95it/s]

Ep 5500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 22%|██▏       | 5605/25000 [02:02<08:20, 38.78it/s]

Ep 5600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 23%|██▎       | 5707/25000 [02:05<06:40, 48.20it/s]

Ep 5700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 23%|██▎       | 5808/25000 [02:07<06:31, 49.02it/s]

Ep 5800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 9


 24%|██▎       | 5908/25000 [02:09<06:28, 49.20it/s]

Ep 5900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 24%|██▍       | 6009/25000 [02:11<06:23, 49.48it/s]

Ep 6000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 24%|██▍       | 6109/25000 [02:13<06:25, 48.99it/s]

Ep 6100/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.3, First Action 3


 25%|██▍       | 6205/25000 [02:15<07:37, 41.05it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 25%|██▌       | 6306/25000 [02:18<07:03, 44.12it/s]

Ep 6300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 4


 26%|██▌       | 6408/25000 [02:20<06:19, 48.96it/s]

Ep 6400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 26%|██▌       | 6505/25000 [02:22<06:23, 48.24it/s]

Ep 6500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 26%|██▋       | 6605/25000 [02:24<06:35, 46.56it/s]

Ep 6600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 27%|██▋       | 6706/25000 [02:26<06:20, 48.05it/s]

Ep 6700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 27%|██▋       | 6805/25000 [02:28<07:41, 39.39it/s]

Ep 6800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 6


 28%|██▊       | 6907/25000 [02:31<07:18, 41.22it/s]

Ep 6900/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.1, First Action 1


 28%|██▊       | 7007/25000 [02:33<06:08, 48.84it/s]

Ep 7000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 28%|██▊       | 7106/25000 [02:35<05:55, 50.29it/s]

Ep 7100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 29%|██▉       | 7206/25000 [02:37<05:53, 50.41it/s]

Ep 7200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 29%|██▉       | 7306/25000 [02:39<05:55, 49.76it/s]

Ep 7300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 30%|██▉       | 7406/25000 [02:41<07:06, 41.24it/s]

Ep 7400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 30%|███       | 7506/25000 [02:44<08:30, 34.28it/s]

Ep 7500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 30%|███       | 7605/25000 [02:46<06:00, 48.28it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 31%|███       | 7707/25000 [02:48<05:54, 48.77it/s]

Ep 7700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 31%|███       | 7810/25000 [02:50<05:46, 49.66it/s]

Ep 7800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 32%|███▏      | 7909/25000 [02:52<05:48, 49.04it/s]

Ep 7900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


 32%|███▏      | 8003/25000 [02:54<06:02, 46.95it/s]

Ep 8000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 32%|███▏      | 8107/25000 [02:57<08:00, 35.18it/s]

Ep 8100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 33%|███▎      | 8208/25000 [02:59<05:34, 50.18it/s]

Ep 8200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 33%|███▎      | 8306/25000 [03:01<05:43, 48.56it/s]

Ep 8300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 34%|███▎      | 8407/25000 [03:03<05:40, 48.71it/s]

Ep 8400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 34%|███▍      | 8510/25000 [03:06<05:31, 49.78it/s]

Ep 8500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 7


 34%|███▍      | 8609/25000 [03:08<05:23, 50.68it/s]

Ep 8600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 6


 35%|███▍      | 8706/25000 [03:10<07:35, 35.79it/s]

Ep 8700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 35%|███▌      | 8810/25000 [03:12<05:28, 49.25it/s]

Ep 8800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 36%|███▌      | 8905/25000 [03:14<05:34, 48.17it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 36%|███▌      | 9009/25000 [03:17<05:21, 49.77it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 36%|███▋      | 9108/25000 [03:19<05:29, 48.29it/s]

Ep 9100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 37%|███▋      | 9206/25000 [03:21<05:16, 49.97it/s]

Ep 9200/25000, Opt. Action: 3, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 3


 37%|███▋      | 9305/25000 [03:23<06:55, 37.80it/s]

Ep 9300/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 9


 38%|███▊      | 9409/25000 [03:26<05:18, 48.94it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 38%|███▊      | 9506/25000 [03:28<05:07, 50.37it/s]

Ep 9500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 38%|███▊      | 9607/25000 [03:30<05:08, 49.95it/s]

Ep 9600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 39%|███▉      | 9707/25000 [03:32<05:06, 49.97it/s]

Ep 9700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 39%|███▉      | 9808/25000 [03:34<05:08, 49.23it/s]

Ep 9800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 40%|███▉      | 9906/25000 [03:36<06:28, 38.83it/s]

Ep 9900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 40%|████      | 10008/25000 [03:39<05:26, 45.99it/s]

Ep 10000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 40%|████      | 10106/25000 [03:41<05:00, 49.62it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 41%|████      | 10209/25000 [03:43<04:58, 49.53it/s]

Ep 10200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 9


 41%|████      | 10306/25000 [03:45<04:59, 49.14it/s]

Ep 10300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 42%|████▏     | 10408/25000 [03:47<04:54, 49.53it/s]

Ep 10400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 42%|████▏     | 10505/25000 [03:49<06:20, 38.08it/s]

Ep 10500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 42%|████▏     | 10607/25000 [03:52<05:59, 40.00it/s]

Ep 10600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 43%|████▎     | 10708/25000 [03:54<04:53, 48.77it/s]

Ep 10700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 7


 43%|████▎     | 10808/25000 [03:56<04:54, 48.22it/s]

Ep 10800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 44%|████▎     | 10909/25000 [03:58<04:48, 48.81it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 44%|████▍     | 11008/25000 [04:00<04:46, 48.82it/s]

Ep 11000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 44%|████▍     | 11104/25000 [04:02<05:45, 40.22it/s]

Ep 11100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 0


 45%|████▍     | 11206/25000 [04:05<06:22, 36.04it/s]

Ep 11200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 45%|████▌     | 11305/25000 [04:07<04:34, 49.95it/s]

Ep 11300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 46%|████▌     | 11405/25000 [04:09<04:44, 47.82it/s]

Ep 11400/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 46%|████▌     | 11507/25000 [04:11<04:38, 48.50it/s]

Ep 11500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 7


 46%|████▋     | 11605/25000 [04:13<04:35, 48.63it/s]

Ep 11600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 47%|████▋     | 11709/25000 [04:15<04:47, 46.27it/s]

Ep 11700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 47%|████▋     | 11804/25000 [04:18<06:10, 35.64it/s]

Ep 11800/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.9, First Action 5


 48%|████▊     | 11905/25000 [04:20<04:32, 48.11it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 48%|████▊     | 12007/25000 [04:22<04:26, 48.70it/s]

Ep 12000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 48%|████▊     | 12106/25000 [04:24<04:25, 48.64it/s]

Ep 12100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 49%|████▉     | 12206/25000 [04:26<04:23, 48.53it/s]

Ep 12200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 49%|████▉     | 12305/25000 [04:28<04:21, 48.62it/s]

Ep 12300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 50%|████▉     | 12404/25000 [04:31<05:45, 36.47it/s]

Ep 12400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 50%|█████     | 12506/25000 [04:33<04:09, 50.17it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 50%|█████     | 12607/25000 [04:35<04:10, 49.40it/s]

Ep 12600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 51%|█████     | 12710/25000 [04:37<04:07, 49.61it/s]

Ep 12700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 51%|█████     | 12805/25000 [04:39<04:07, 49.19it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 52%|█████▏    | 12909/25000 [04:41<03:59, 50.53it/s]

Ep 12900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 52%|█████▏    | 13007/25000 [04:44<05:07, 38.98it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 6


 52%|█████▏    | 13109/25000 [04:46<04:11, 47.23it/s]

Ep 13100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 6


 53%|█████▎    | 13206/25000 [04:48<04:02, 48.60it/s]

Ep 13200/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 4


 53%|█████▎    | 13308/25000 [04:50<04:01, 48.50it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 54%|█████▎    | 13410/25000 [04:52<03:59, 48.48it/s]

Ep 13400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 54%|█████▍    | 13508/25000 [04:54<03:48, 50.39it/s]

Ep 13500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 54%|█████▍    | 13606/25000 [04:57<05:11, 36.63it/s]

Ep 13600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 55%|█████▍    | 13705/25000 [04:59<04:00, 47.00it/s]

Ep 13700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 55%|█████▌    | 13809/25000 [05:02<03:48, 48.94it/s]

Ep 13800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 56%|█████▌    | 13905/25000 [05:04<03:42, 49.88it/s]

Ep 13900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 56%|█████▌    | 14006/25000 [05:06<03:44, 48.90it/s]

Ep 14000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 56%|█████▋    | 14108/25000 [05:08<03:42, 49.03it/s]

Ep 14100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 57%|█████▋    | 14205/25000 [05:10<04:37, 38.91it/s]

Ep 14200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 57%|█████▋    | 14306/25000 [05:13<04:03, 43.98it/s]

Ep 14300/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 6


 58%|█████▊    | 14410/25000 [05:15<03:29, 50.58it/s]

Ep 14400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 58%|█████▊    | 14507/25000 [05:17<03:34, 48.96it/s]

Ep 14500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 58%|█████▊    | 14609/25000 [05:19<03:37, 47.86it/s]

Ep 14600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 59%|█████▉    | 14709/25000 [05:21<03:35, 47.69it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 59%|█████▉    | 14805/25000 [05:23<04:26, 38.21it/s]

Ep 14800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 60%|█████▉    | 14904/25000 [05:26<05:00, 33.57it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 60%|██████    | 15009/25000 [05:28<03:28, 47.84it/s]

Ep 15000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 60%|██████    | 15108/25000 [05:30<03:26, 47.85it/s]

Ep 15100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 61%|██████    | 15206/25000 [05:32<03:19, 49.02it/s]

Ep 15200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 61%|██████    | 15307/25000 [05:34<03:23, 47.57it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 62%|██████▏   | 15403/25000 [05:36<03:48, 42.04it/s]

Ep 15400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 62%|██████▏   | 15506/25000 [05:39<04:27, 35.53it/s]

Ep 15500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 62%|██████▏   | 15605/25000 [05:41<03:14, 48.23it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 6


 63%|██████▎   | 15709/25000 [05:43<03:10, 48.68it/s]

Ep 15700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 63%|██████▎   | 15808/25000 [05:45<03:10, 48.21it/s]

Ep 15800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 64%|██████▎   | 15908/25000 [05:47<03:04, 49.34it/s]

Ep 15900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 64%|██████▍   | 16008/25000 [05:49<03:00, 49.79it/s]

Ep 16000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 64%|██████▍   | 16106/25000 [05:52<04:11, 35.30it/s]

Ep 16100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 65%|██████▍   | 16206/25000 [05:54<03:01, 48.32it/s]

Ep 16200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 2


 65%|██████▌   | 16307/25000 [05:56<03:10, 45.74it/s]

Ep 16300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 66%|██████▌   | 16407/25000 [05:59<03:34, 40.12it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 66%|██████▌   | 16506/25000 [06:01<03:08, 45.03it/s]

Ep 16500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 66%|██████▋   | 16604/25000 [06:03<03:32, 39.48it/s]

Ep 16600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 67%|██████▋   | 16706/25000 [06:06<03:58, 34.70it/s]

Ep 16700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 67%|██████▋   | 16808/25000 [06:08<02:45, 49.38it/s]

Ep 16800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 68%|██████▊   | 16905/25000 [06:10<02:41, 49.98it/s]

Ep 16900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 68%|██████▊   | 17008/25000 [06:12<02:39, 50.00it/s]

Ep 17000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 68%|██████▊   | 17106/25000 [06:14<02:39, 49.47it/s]

Ep 17100/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 7


 69%|██████▉   | 17203/25000 [06:16<02:59, 43.44it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 69%|██████▉   | 17307/25000 [06:19<03:38, 35.14it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 70%|██████▉   | 17405/25000 [06:21<02:29, 50.83it/s]

Ep 17400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 70%|███████   | 17507/25000 [06:23<02:32, 49.15it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 70%|███████   | 17607/25000 [06:25<02:30, 49.20it/s]

Ep 17600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 71%|███████   | 17706/25000 [06:27<02:28, 49.04it/s]

Ep 17700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 71%|███████   | 17809/25000 [06:30<02:21, 50.75it/s]

Ep 17800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 72%|███████▏  | 17906/25000 [06:32<03:19, 35.56it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


 72%|███████▏  | 18006/25000 [06:35<02:24, 48.38it/s]

Ep 18000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 72%|███████▏  | 18107/25000 [06:37<02:18, 49.77it/s]

Ep 18100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 73%|███████▎  | 18204/25000 [06:39<02:19, 48.73it/s]

Ep 18200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 6


 73%|███████▎  | 18305/25000 [06:41<02:16, 49.03it/s]

Ep 18300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 74%|███████▎  | 18408/25000 [06:43<02:17, 47.89it/s]

Ep 18400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 74%|███████▍  | 18506/25000 [06:45<02:57, 36.62it/s]

Ep 18500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


 74%|███████▍  | 18608/25000 [06:48<02:15, 47.07it/s]

Ep 18600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 75%|███████▍  | 18706/25000 [06:50<02:14, 46.96it/s]

Ep 18700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 75%|███████▌  | 18808/25000 [06:52<02:12, 46.74it/s]

Ep 18800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 76%|███████▌  | 18905/25000 [06:54<02:06, 48.10it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 76%|███████▌  | 19007/25000 [06:56<02:02, 48.74it/s]

Ep 19000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 76%|███████▋  | 19107/25000 [06:59<02:30, 39.16it/s]

Ep 19100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 77%|███████▋  | 19205/25000 [07:01<02:04, 46.43it/s]

Ep 19200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 77%|███████▋  | 19308/25000 [07:03<01:59, 47.66it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 78%|███████▊  | 19407/25000 [07:05<01:56, 48.07it/s]

Ep 19400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 78%|███████▊  | 19508/25000 [07:07<01:52, 49.00it/s]

Ep 19500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 78%|███████▊  | 19607/25000 [07:09<01:50, 48.66it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 79%|███████▉  | 19706/25000 [07:12<02:17, 38.44it/s]

Ep 19700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 79%|███████▉  | 19806/25000 [07:14<01:57, 44.11it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 80%|███████▉  | 19906/25000 [07:16<01:44, 48.83it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 80%|████████  | 20006/25000 [07:18<01:43, 48.20it/s]

Ep 20000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 80%|████████  | 20107/25000 [07:21<01:41, 48.17it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 81%|████████  | 20206/25000 [07:23<01:39, 47.96it/s]

Ep 20200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 81%|████████  | 20305/25000 [07:25<02:04, 37.86it/s]

Ep 20300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 82%|████████▏ | 20409/25000 [07:28<01:46, 43.23it/s]

Ep 20400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 82%|████████▏ | 20510/25000 [07:30<01:30, 49.72it/s]

Ep 20500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 82%|████████▏ | 20605/25000 [07:32<01:28, 49.83it/s]

Ep 20600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 6


 83%|████████▎ | 20704/25000 [07:34<01:27, 48.95it/s]

Ep 20700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 83%|████████▎ | 20809/25000 [07:36<01:27, 47.96it/s]

Ep 20800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 84%|████████▎ | 20906/25000 [07:38<01:47, 38.12it/s]

Ep 20900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 84%|████████▍ | 21004/25000 [07:41<01:54, 35.01it/s]

Ep 21000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 84%|████████▍ | 21106/25000 [07:43<01:21, 47.85it/s]

Ep 21100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 85%|████████▍ | 21205/25000 [07:45<01:19, 48.01it/s]

Ep 21200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 85%|████████▌ | 21308/25000 [07:47<01:17, 47.93it/s]

Ep 21300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 86%|████████▌ | 21407/25000 [07:49<01:13, 49.16it/s]

Ep 21400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 86%|████████▌ | 21506/25000 [07:51<01:24, 41.20it/s]

Ep 21500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


 86%|████████▋ | 21603/25000 [07:54<01:42, 33.24it/s]

Ep 21600/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 5


 87%|████████▋ | 21708/25000 [07:56<01:06, 49.76it/s]

Ep 21700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 87%|████████▋ | 21809/25000 [07:58<01:05, 48.68it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 88%|████████▊ | 21907/25000 [08:00<01:03, 48.99it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 7


 88%|████████▊ | 22010/25000 [08:02<01:01, 48.95it/s]

Ep 22000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 0


 88%|████████▊ | 22104/25000 [08:04<01:08, 42.01it/s]

Ep 22100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 89%|████████▉ | 22206/25000 [08:07<01:21, 34.42it/s]

Ep 22200/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 6


 89%|████████▉ | 22308/25000 [08:10<00:54, 48.95it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 90%|████████▉ | 22407/25000 [08:12<00:52, 49.50it/s]

Ep 22400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 90%|█████████ | 22510/25000 [08:14<00:49, 50.39it/s]

Ep 22500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 90%|█████████ | 22605/25000 [08:16<00:49, 48.63it/s]

Ep 22600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 91%|█████████ | 22707/25000 [08:18<00:46, 48.91it/s]

Ep 22700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 91%|█████████ | 22804/25000 [08:20<01:02, 35.03it/s]

Ep 22800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 92%|█████████▏| 22906/25000 [08:23<00:43, 48.37it/s]

Ep 22900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 92%|█████████▏| 23005/25000 [08:25<00:42, 47.33it/s]

Ep 23000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 92%|█████████▏| 23105/25000 [08:27<00:40, 47.30it/s]

Ep 23100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 93%|█████████▎| 23209/25000 [08:29<00:37, 47.90it/s]

Ep 23200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 93%|█████████▎| 23306/25000 [08:31<00:35, 47.80it/s]

Ep 23300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 94%|█████████▎| 23405/25000 [08:34<00:44, 35.64it/s]

Ep 23400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 94%|█████████▍| 23508/25000 [08:36<00:31, 47.79it/s]

Ep 23500/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.3, First Action 6


 94%|█████████▍| 23605/25000 [08:38<00:29, 47.65it/s]

Ep 23600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 95%|█████████▍| 23709/25000 [08:40<00:26, 47.86it/s]

Ep 23700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 95%|█████████▌| 23806/25000 [08:42<00:24, 48.48it/s]

Ep 23800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 96%|█████████▌| 23908/25000 [08:44<00:22, 48.83it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 96%|█████████▌| 24006/25000 [08:47<00:25, 38.76it/s]

Ep 24000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 96%|█████████▋| 24105/25000 [08:49<00:18, 47.89it/s]

Ep 24100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 97%|█████████▋| 24209/25000 [08:51<00:16, 49.19it/s]

Ep 24200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 97%|█████████▋| 24308/25000 [08:53<00:13, 49.61it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 98%|█████████▊| 24407/25000 [08:56<00:12, 48.49it/s]

Ep 24400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 4


 98%|█████████▊| 24505/25000 [08:58<00:09, 49.66it/s]

Ep 24500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 6


 98%|█████████▊| 24606/25000 [09:00<00:10, 37.43it/s]

Ep 24600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 99%|█████████▉| 24706/25000 [09:03<00:06, 48.18it/s]

Ep 24700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 6


 99%|█████████▉| 24807/25000 [09:05<00:03, 48.81it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


100%|█████████▉| 24908/25000 [09:07<00:01, 50.44it/s]

Ep 24900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


100%|██████████| 25000/25000 [09:09<00:00, 45.53it/s]


Ep 25000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 4

TEST:


 43%|████▎     | 129/300 [00:00<00:01, 152.60it/s]

Ep 100/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 76%|███████▌  | 227/300 [00:01<00:00, 155.07it/s]

Ep 200/300, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


100%|██████████| 300/300 [00:01<00:00, 154.08it/s]


Ep 300/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5

GAMMA 0.8 - LR 0.0001 - Entropy Decay True


  0%|          | 105/25000 [00:02<10:55, 37.97it/s]

Ep 100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  1%|          | 207/25000 [00:05<08:48, 46.91it/s]

Ep 200/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 18.5, First Action 2


  1%|          | 308/25000 [00:07<08:17, 49.66it/s]

Ep 300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


  2%|▏         | 405/25000 [00:09<08:24, 48.73it/s]

Ep 400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


  2%|▏         | 508/25000 [00:11<08:35, 47.55it/s]

Ep 500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  2%|▏         | 609/25000 [00:13<08:00, 50.74it/s]

Ep 600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


  3%|▎         | 705/25000 [00:15<10:37, 38.08it/s]

Ep 700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  3%|▎         | 809/25000 [00:18<08:24, 47.98it/s]

Ep 800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


  4%|▎         | 907/25000 [00:20<08:05, 49.59it/s]

Ep 900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


  4%|▍         | 1009/25000 [00:22<07:55, 50.41it/s]

Ep 1000/25000, Opt. Action: 5, Reward: 12.600000023841858, Cumulative-Regret: 12.399999976158142, AVG100-Regret: 17.8, First Action 10


  4%|▍         | 1108/25000 [00:24<08:08, 48.90it/s]

Ep 1100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


  5%|▍         | 1205/25000 [00:26<08:02, 49.34it/s]

Ep 1200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


  5%|▌         | 1307/25000 [00:28<10:18, 38.33it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


  6%|▌         | 1407/25000 [00:31<08:55, 44.04it/s]

Ep 1400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


  6%|▌         | 1507/25000 [00:33<07:57, 49.19it/s]

Ep 1500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


  6%|▋         | 1608/25000 [00:35<08:05, 48.19it/s]

Ep 1600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  7%|▋         | 1705/25000 [00:37<08:13, 47.25it/s]

Ep 1700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  7%|▋         | 1808/25000 [00:39<07:56, 48.67it/s]

Ep 1800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


  8%|▊         | 1908/25000 [00:42<09:43, 39.60it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


  8%|▊         | 2006/25000 [00:44<09:36, 39.86it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


  8%|▊         | 2107/25000 [00:47<08:04, 47.28it/s]

Ep 2100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 9


  9%|▉         | 2209/25000 [00:49<07:47, 48.74it/s]

Ep 2200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


  9%|▉         | 2306/25000 [00:51<07:54, 47.87it/s]

Ep 2300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 9


 10%|▉         | 2407/25000 [00:53<07:46, 48.41it/s]

Ep 2400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


 10%|█         | 2504/25000 [00:55<10:15, 36.53it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 10%|█         | 2604/25000 [00:58<10:46, 34.66it/s]

Ep 2600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 11%|█         | 2705/25000 [01:00<07:37, 48.69it/s]

Ep 2700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 11%|█         | 2808/25000 [01:02<07:37, 48.55it/s]

Ep 2800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 12%|█▏        | 2908/25000 [01:04<07:40, 47.94it/s]

Ep 2900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 12%|█▏        | 3006/25000 [01:06<07:33, 48.54it/s]

Ep 3000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 12%|█▏        | 3105/25000 [01:08<09:24, 38.77it/s]

Ep 3100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 13%|█▎        | 3206/25000 [01:11<10:43, 33.86it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 13%|█▎        | 3307/25000 [01:13<07:26, 48.62it/s]

Ep 3300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 14%|█▎        | 3407/25000 [01:15<07:33, 47.58it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.6, First Action 8


 14%|█▍        | 3505/25000 [01:18<09:15, 38.71it/s]

Ep 3500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 14%|█▍        | 3605/25000 [01:20<08:56, 39.88it/s]

Ep 3600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 15%|█▍        | 3705/25000 [01:23<09:18, 38.10it/s]

Ep 3700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 15%|█▌        | 3809/25000 [01:26<07:38, 46.21it/s]

Ep 3800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 16%|█▌        | 3906/25000 [01:28<07:11, 48.87it/s]

Ep 3900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 8


 16%|█▌        | 4010/25000 [01:30<07:11, 48.68it/s]

Ep 4000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 16%|█▋        | 4106/25000 [01:32<07:10, 48.55it/s]

Ep 4100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 17%|█▋        | 4208/25000 [01:34<07:13, 47.93it/s]

Ep 4200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 17%|█▋        | 4306/25000 [01:36<08:45, 39.40it/s]

Ep 4300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 18%|█▊        | 4405/25000 [01:39<08:01, 42.77it/s]

Ep 4400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 18%|█▊        | 4506/25000 [01:41<06:52, 49.67it/s]

Ep 4500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 18%|█▊        | 4608/25000 [01:43<06:50, 49.72it/s]

Ep 4600/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 18.0, First Action 9


 19%|█▉        | 4710/25000 [01:45<06:48, 49.68it/s]

Ep 4700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 19%|█▉        | 4808/25000 [01:47<06:50, 49.21it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 20%|█▉        | 4906/25000 [01:49<09:17, 36.03it/s]

Ep 4900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 2


 20%|██        | 5010/25000 [01:52<07:38, 43.62it/s]

Ep 5000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 20%|██        | 5108/25000 [01:54<06:46, 48.90it/s]

Ep 5100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 6


 21%|██        | 5208/25000 [01:56<06:41, 49.35it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 9


 21%|██        | 5305/25000 [01:58<06:34, 49.96it/s]

Ep 5300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 22%|██▏       | 5407/25000 [02:00<06:47, 48.10it/s]

Ep 5400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 22%|██▏       | 5504/25000 [02:02<08:25, 38.55it/s]

Ep 5500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 22%|██▏       | 5605/25000 [02:05<09:15, 34.94it/s]

Ep 5600/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 4


 23%|██▎       | 5707/25000 [02:07<06:42, 47.91it/s]

Ep 5700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 23%|██▎       | 5809/25000 [02:10<06:37, 48.24it/s]

Ep 5800/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 4


 24%|██▎       | 5909/25000 [02:12<06:34, 48.40it/s]

Ep 5900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 24%|██▍       | 6008/25000 [02:14<06:22, 49.62it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 24%|██▍       | 6104/25000 [02:16<06:47, 46.37it/s]

Ep 6100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 7


 25%|██▍       | 6204/25000 [02:18<09:10, 34.13it/s]

Ep 6200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 25%|██▌       | 6306/25000 [02:21<06:51, 45.41it/s]

Ep 6300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 26%|██▌       | 6407/25000 [02:23<06:41, 46.34it/s]

Ep 6400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 26%|██▌       | 6506/25000 [02:25<06:17, 49.05it/s]

Ep 6500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 26%|██▋       | 6607/25000 [02:27<06:14, 49.18it/s]

Ep 6600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 27%|██▋       | 6704/25000 [02:29<06:17, 48.47it/s]

Ep 6700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 27%|██▋       | 6804/25000 [02:32<09:13, 32.86it/s]

Ep 6800/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 28%|██▊       | 6906/25000 [02:34<06:34, 45.85it/s]

Ep 6900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 28%|██▊       | 7007/25000 [02:37<06:17, 47.62it/s]

Ep 7000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 28%|██▊       | 7107/25000 [02:39<06:06, 48.78it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 29%|██▉       | 7208/25000 [02:41<06:09, 48.14it/s]

Ep 7200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 29%|██▉       | 7308/25000 [02:43<06:08, 47.98it/s]

Ep 7300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 30%|██▉       | 7404/25000 [02:45<08:18, 35.29it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 30%|███       | 7507/25000 [02:48<05:57, 48.96it/s]

Ep 7500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 30%|███       | 7605/25000 [02:50<05:55, 48.98it/s]

Ep 7600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 31%|███       | 7705/25000 [02:52<05:56, 48.50it/s]

Ep 7700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 31%|███       | 7805/25000 [02:54<05:57, 48.16it/s]

Ep 7800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 32%|███▏      | 7906/25000 [02:56<06:06, 46.64it/s]

Ep 7900/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 3


 32%|███▏      | 8007/25000 [02:59<08:00, 35.36it/s]

Ep 8000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 32%|███▏      | 8105/25000 [03:01<06:01, 46.73it/s]

Ep 8100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 33%|███▎      | 8206/25000 [03:04<05:56, 47.06it/s]

Ep 8200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


 33%|███▎      | 8308/25000 [03:06<05:40, 49.05it/s]

Ep 8300/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 7


 34%|███▎      | 8405/25000 [03:08<05:40, 48.74it/s]

Ep 8400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 34%|███▍      | 8506/25000 [03:10<05:45, 47.69it/s]

Ep 8500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 34%|███▍      | 8607/25000 [03:13<07:51, 34.74it/s]

Ep 8600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 35%|███▍      | 8705/25000 [03:15<05:35, 48.56it/s]

Ep 8700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 35%|███▌      | 8806/25000 [03:17<05:37, 48.02it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 36%|███▌      | 8907/25000 [03:19<05:45, 46.52it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 1


 36%|███▌      | 9006/25000 [03:21<05:32, 48.04it/s]

Ep 9000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 36%|███▋      | 9109/25000 [03:23<05:36, 47.21it/s]

Ep 9100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 37%|███▋      | 9204/25000 [03:26<07:25, 35.49it/s]

Ep 9200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 37%|███▋      | 9308/25000 [03:28<05:30, 47.47it/s]

Ep 9300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 38%|███▊      | 9409/25000 [03:31<05:34, 46.55it/s]

Ep 9400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 4


 38%|███▊      | 9506/25000 [03:33<05:14, 49.33it/s]

Ep 9500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 38%|███▊      | 9607/25000 [03:35<05:13, 49.03it/s]

Ep 9600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 39%|███▉      | 9708/25000 [03:37<05:30, 46.24it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 39%|███▉      | 9806/25000 [03:39<07:02, 35.99it/s]

Ep 9800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


 40%|███▉      | 9906/25000 [03:42<05:08, 48.96it/s]

Ep 9900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 40%|████      | 10005/25000 [03:44<05:05, 49.05it/s]

Ep 10000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 40%|████      | 10107/25000 [03:46<05:00, 49.52it/s]

Ep 10100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 41%|████      | 10205/25000 [03:48<05:03, 48.75it/s]

Ep 10200/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 7


 41%|████      | 10305/25000 [03:50<05:02, 48.54it/s]

Ep 10300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 42%|████▏     | 10407/25000 [03:53<06:17, 38.64it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 3


 42%|████▏     | 10505/25000 [03:55<05:00, 48.32it/s]

Ep 10500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 42%|████▏     | 10606/25000 [03:57<04:56, 48.49it/s]

Ep 10600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 43%|████▎     | 10707/25000 [03:59<04:58, 47.87it/s]

Ep 10700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 43%|████▎     | 10808/25000 [04:01<04:55, 48.00it/s]

Ep 10800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 2


 44%|████▎     | 10909/25000 [04:04<04:51, 48.41it/s]

Ep 10900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 44%|████▍     | 11007/25000 [04:06<06:14, 37.40it/s]

Ep 11000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 9


 44%|████▍     | 11109/25000 [04:09<04:56, 46.84it/s]

Ep 11100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 45%|████▍     | 11206/25000 [04:11<04:47, 48.03it/s]

Ep 11200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 0


 45%|████▌     | 11306/25000 [04:13<04:41, 48.61it/s]

Ep 11300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 46%|████▌     | 11409/25000 [04:15<04:37, 49.03it/s]

Ep 11400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 46%|████▌     | 11506/25000 [04:17<04:37, 48.68it/s]

Ep 11500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 46%|████▋     | 11605/25000 [04:19<06:17, 35.51it/s]

Ep 11600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 47%|████▋     | 11707/25000 [04:22<04:38, 47.80it/s]

Ep 11700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 47%|████▋     | 11806/25000 [04:24<04:23, 49.98it/s]

Ep 11800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 48%|████▊     | 11907/25000 [04:26<04:31, 48.22it/s]

Ep 11900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 48%|████▊     | 12007/25000 [04:28<04:28, 48.33it/s]

Ep 12000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 48%|████▊     | 12105/25000 [04:30<04:20, 49.47it/s]

Ep 12100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 49%|████▉     | 12204/25000 [04:33<05:42, 37.32it/s]

Ep 12200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 49%|████▉     | 12309/25000 [04:35<04:50, 43.75it/s]

Ep 12300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 50%|████▉     | 12409/25000 [04:38<04:27, 47.04it/s]

Ep 12400/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 4


 50%|█████     | 12506/25000 [04:40<04:21, 47.70it/s]

Ep 12500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 50%|█████     | 12606/25000 [04:42<04:17, 48.08it/s]

Ep 12600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 8


 51%|█████     | 12706/25000 [04:44<04:15, 48.17it/s]

Ep 12700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 5


 51%|█████     | 12807/25000 [04:46<05:27, 37.20it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 3


 52%|█████▏    | 12906/25000 [04:49<04:54, 41.02it/s]

Ep 12900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 52%|█████▏    | 13010/25000 [04:51<04:02, 49.54it/s]

Ep 13000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 52%|█████▏    | 13107/25000 [04:53<04:02, 48.96it/s]

Ep 13100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 53%|█████▎    | 13209/25000 [04:55<04:04, 48.32it/s]

Ep 13200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 53%|█████▎    | 13306/25000 [04:57<04:01, 48.39it/s]

Ep 13300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 54%|█████▎    | 13405/25000 [04:59<05:12, 37.11it/s]

Ep 13400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 54%|█████▍    | 13508/25000 [05:02<05:05, 37.66it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 54%|█████▍    | 13605/25000 [05:04<03:53, 48.78it/s]

Ep 13600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 55%|█████▍    | 13705/25000 [05:06<03:54, 48.26it/s]

Ep 13700/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 1


 55%|█████▌    | 13806/25000 [05:08<03:57, 47.23it/s]

Ep 13800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 56%|█████▌    | 13907/25000 [05:11<03:53, 47.60it/s]

Ep 13900/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 56%|█████▌    | 14007/25000 [05:13<04:44, 38.70it/s]

Ep 14000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 56%|█████▋    | 14104/25000 [05:16<05:21, 33.86it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 57%|█████▋    | 14208/25000 [05:18<03:42, 48.53it/s]

Ep 14200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 57%|█████▋    | 14305/25000 [05:20<03:50, 46.36it/s]

Ep 14300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 1


 58%|█████▊    | 14408/25000 [05:22<03:37, 48.72it/s]

Ep 14400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 58%|█████▊    | 14509/25000 [05:24<03:36, 48.54it/s]

Ep 14500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 58%|█████▊    | 14604/25000 [05:26<04:30, 38.41it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 8


 59%|█████▉    | 14705/25000 [05:29<04:55, 34.83it/s]

Ep 14700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 59%|█████▉    | 14808/25000 [05:31<03:35, 47.20it/s]

Ep 14800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 60%|█████▉    | 14905/25000 [05:33<03:26, 48.89it/s]

Ep 14900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 60%|██████    | 15006/25000 [05:35<03:29, 47.67it/s]

Ep 15000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 60%|██████    | 15106/25000 [05:37<03:32, 46.61it/s]

Ep 15100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 1


 61%|██████    | 15203/25000 [05:40<04:08, 39.47it/s]

Ep 15200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 61%|██████    | 15303/25000 [05:42<04:40, 34.62it/s]

Ep 15300/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 1


 62%|██████▏   | 15405/25000 [05:45<03:22, 47.33it/s]

Ep 15400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 62%|██████▏   | 15509/25000 [05:47<03:15, 48.61it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 62%|██████▏   | 15609/25000 [05:49<03:14, 48.31it/s]

Ep 15600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 63%|██████▎   | 15709/25000 [05:51<03:14, 47.78it/s]

Ep 15700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 63%|██████▎   | 15804/25000 [05:54<04:38, 33.07it/s]

Ep 15800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 64%|██████▎   | 15906/25000 [05:56<04:33, 33.24it/s]

Ep 15900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 64%|██████▍   | 16005/25000 [05:59<03:33, 42.17it/s]

Ep 16000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 64%|██████▍   | 16105/25000 [06:01<03:09, 46.87it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 65%|██████▍   | 16206/25000 [06:04<03:01, 48.48it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 65%|██████▌   | 16307/25000 [06:06<03:06, 46.49it/s]

Ep 16300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 66%|██████▌   | 16409/25000 [06:08<02:54, 49.13it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 66%|██████▌   | 16505/25000 [06:10<03:44, 37.84it/s]

Ep 16500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 6


 66%|██████▋   | 16609/25000 [06:13<03:06, 45.09it/s]

Ep 16600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 67%|██████▋   | 16709/25000 [06:15<02:51, 48.39it/s]

Ep 16700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 67%|██████▋   | 16806/25000 [06:17<02:50, 48.00it/s]

Ep 16800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 5


 68%|██████▊   | 16907/25000 [06:19<02:54, 46.50it/s]

Ep 16900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 68%|██████▊   | 17005/25000 [06:21<02:50, 47.03it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 68%|██████▊   | 17105/25000 [06:24<03:25, 38.39it/s]

Ep 17100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 8


 69%|██████▉   | 17209/25000 [06:27<02:53, 44.98it/s]

Ep 17200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 69%|██████▉   | 17306/25000 [06:29<02:43, 47.13it/s]

Ep 17300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 70%|██████▉   | 17408/25000 [06:31<02:35, 48.88it/s]

Ep 17400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 70%|███████   | 17506/25000 [06:33<02:33, 48.70it/s]

Ep 17500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 70%|███████   | 17609/25000 [06:35<02:35, 47.53it/s]

Ep 17600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 71%|███████   | 17705/25000 [06:37<03:16, 37.17it/s]

Ep 17700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 71%|███████   | 17809/25000 [06:40<02:42, 44.35it/s]

Ep 17800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 72%|███████▏  | 17905/25000 [06:42<02:27, 48.10it/s]

Ep 17900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 72%|███████▏  | 18007/25000 [06:44<02:24, 48.30it/s]

Ep 18000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 72%|███████▏  | 18109/25000 [06:46<02:26, 47.09it/s]

Ep 18100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 1


 73%|███████▎  | 18206/25000 [06:48<02:21, 47.92it/s]

Ep 18200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 0


 73%|███████▎  | 18306/25000 [06:51<03:03, 36.48it/s]

Ep 18300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 74%|███████▎  | 18408/25000 [06:53<02:30, 43.78it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 9


 74%|███████▍  | 18505/25000 [06:55<02:14, 48.47it/s]

Ep 18500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 74%|███████▍  | 18607/25000 [06:58<02:11, 48.66it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 75%|███████▍  | 18706/25000 [07:00<02:08, 49.06it/s]

Ep 18700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 75%|███████▌  | 18807/25000 [07:02<02:07, 48.63it/s]

Ep 18800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 76%|███████▌  | 18907/25000 [07:04<02:39, 38.14it/s]

Ep 18900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 76%|███████▌  | 19006/25000 [07:07<02:28, 40.34it/s]

Ep 19000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 76%|███████▋  | 19107/25000 [07:09<02:01, 48.54it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 77%|███████▋  | 19209/25000 [07:11<01:59, 48.61it/s]

Ep 19200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


 77%|███████▋  | 19306/25000 [07:13<01:55, 49.39it/s]

Ep 19300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 0


 78%|███████▊  | 19408/25000 [07:15<01:55, 48.37it/s]

Ep 19400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 78%|███████▊  | 19506/25000 [07:18<02:30, 36.41it/s]

Ep 19500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 78%|███████▊  | 19607/25000 [07:20<02:31, 35.62it/s]

Ep 19600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 79%|███████▉  | 19708/25000 [07:22<01:50, 48.08it/s]

Ep 19700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 79%|███████▉  | 19808/25000 [07:24<01:49, 47.28it/s]

Ep 19800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 80%|███████▉  | 19910/25000 [07:27<01:44, 48.72it/s]

Ep 19900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 80%|████████  | 20005/25000 [07:29<01:44, 47.79it/s]

Ep 20000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 80%|████████  | 20106/25000 [07:31<02:06, 38.59it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 81%|████████  | 20207/25000 [07:34<02:22, 33.69it/s]

Ep 20200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 81%|████████  | 20307/25000 [07:36<01:37, 48.13it/s]

Ep 20300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 82%|████████▏ | 20407/25000 [07:38<01:35, 48.29it/s]

Ep 20400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 82%|████████▏ | 20508/25000 [07:40<01:31, 49.04it/s]

Ep 20500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 82%|████████▏ | 20608/25000 [07:42<01:31, 48.18it/s]

Ep 20600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 83%|████████▎ | 20706/25000 [07:44<01:52, 38.09it/s]

Ep 20700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 83%|████████▎ | 20805/25000 [07:47<02:01, 34.54it/s]

Ep 20800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 84%|████████▎ | 20907/25000 [07:49<01:24, 48.54it/s]

Ep 20900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 8


 84%|████████▍ | 21008/25000 [07:51<01:25, 46.94it/s]

Ep 21000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 84%|████████▍ | 21108/25000 [07:54<01:21, 47.89it/s]

Ep 21100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 85%|████████▍ | 21209/25000 [07:56<01:18, 48.03it/s]

Ep 21200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 85%|████████▌ | 21304/25000 [07:58<01:28, 41.77it/s]

Ep 21300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 2


 86%|████████▌ | 21406/25000 [08:01<01:46, 33.59it/s]

Ep 21400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 86%|████████▌ | 21508/25000 [08:03<01:11, 48.79it/s]

Ep 21500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 86%|████████▋ | 21605/25000 [08:05<01:09, 48.56it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 1


 87%|████████▋ | 21706/25000 [08:07<01:08, 47.98it/s]

Ep 21700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 87%|████████▋ | 21806/25000 [08:09<01:05, 48.50it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 88%|████████▊ | 21903/25000 [08:11<01:07, 45.70it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 88%|████████▊ | 22004/25000 [08:14<01:27, 34.41it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 88%|████████▊ | 22108/25000 [08:16<00:59, 48.99it/s]

Ep 22100/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 89%|████████▉ | 22208/25000 [08:18<00:59, 47.08it/s]

Ep 22200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 89%|████████▉ | 22305/25000 [08:20<00:56, 47.32it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 90%|████████▉ | 22407/25000 [08:23<00:55, 46.89it/s]

Ep 22400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 90%|█████████ | 22505/25000 [08:25<00:52, 47.82it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 90%|█████████ | 22605/25000 [08:27<01:05, 36.30it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 91%|█████████ | 22707/25000 [08:30<00:48, 47.22it/s]

Ep 22700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 91%|█████████ | 22807/25000 [08:32<00:46, 47.45it/s]

Ep 22800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 6


 92%|█████████▏| 22907/25000 [08:34<00:43, 47.66it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 1


 92%|█████████▏| 23009/25000 [08:36<00:42, 46.96it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 92%|█████████▏| 23105/25000 [08:38<00:39, 47.90it/s]

Ep 23100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 93%|█████████▎| 23204/25000 [08:41<00:49, 36.60it/s]

Ep 23200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 93%|█████████▎| 23309/25000 [08:43<00:35, 47.65it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 94%|█████████▎| 23405/25000 [08:45<00:33, 47.65it/s]

Ep 23400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 94%|█████████▍| 23509/25000 [08:47<00:32, 46.27it/s]

Ep 23500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 94%|█████████▍| 23606/25000 [08:49<00:29, 47.88it/s]

Ep 23600/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 95%|█████████▍| 23706/25000 [08:52<00:26, 49.14it/s]

Ep 23700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 95%|█████████▌| 23803/25000 [08:54<00:33, 36.02it/s]

Ep 23800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 96%|█████████▌| 23906/25000 [08:57<00:22, 47.62it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 96%|█████████▌| 24006/25000 [08:59<00:21, 46.62it/s]

Ep 24000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 96%|█████████▋| 24106/25000 [09:01<00:19, 46.30it/s]

Ep 24100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 97%|█████████▋| 24207/25000 [09:03<00:16, 47.34it/s]

Ep 24200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 97%|█████████▋| 24307/25000 [09:05<00:16, 42.37it/s]

Ep 24300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 98%|█████████▊| 24404/25000 [09:08<00:17, 34.37it/s]

Ep 24400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 98%|█████████▊| 24508/25000 [09:11<00:10, 46.74it/s]

Ep 24500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 98%|█████████▊| 24605/25000 [09:13<00:08, 45.63it/s]

Ep 24600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 99%|█████████▉| 24705/25000 [09:15<00:06, 47.29it/s]

Ep 24700/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 6


 99%|█████████▉| 24805/25000 [09:17<00:04, 47.41it/s]

Ep 24800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


100%|█████████▉| 24906/25000 [09:19<00:02, 46.60it/s]

Ep 24900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 3


100%|██████████| 25000/25000 [09:22<00:00, 44.46it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3

TEST:


 43%|████▎     | 129/300 [00:00<00:01, 148.69it/s]

Ep 100/300, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 75%|███████▍  | 224/300 [00:01<00:00, 149.36it/s]

Ep 200/300, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 5


100%|██████████| 300/300 [00:02<00:00, 149.69it/s]


Ep 300/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4

GAMMA 0.8 - LR 0.001 - Entropy Decay False


  0%|          | 107/25000 [00:02<08:40, 47.86it/s]

Ep 100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


  1%|          | 207/25000 [00:04<08:41, 47.54it/s]

Ep 200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


  1%|          | 305/25000 [00:06<10:48, 38.10it/s]

Ep 300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


  2%|▏         | 409/25000 [00:09<08:51, 46.29it/s]

Ep 400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


  2%|▏         | 509/25000 [00:11<08:36, 47.42it/s]

Ep 500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


  2%|▏         | 605/25000 [00:13<08:23, 48.40it/s]

Ep 600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  3%|▎         | 707/25000 [00:15<08:26, 47.95it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


  3%|▎         | 807/25000 [00:18<08:45, 46.06it/s]

Ep 800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


  4%|▎         | 904/25000 [00:20<11:07, 36.08it/s]

Ep 900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  4%|▍         | 1005/25000 [00:23<08:59, 44.52it/s]

Ep 1000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  4%|▍         | 1107/25000 [00:25<08:54, 44.69it/s]

Ep 1100/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.1, First Action 4


  5%|▍         | 1207/25000 [00:27<08:32, 46.44it/s]

Ep 1200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  5%|▌         | 1309/25000 [00:29<08:10, 48.26it/s]

Ep 1300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 4


  6%|▌         | 1406/25000 [00:31<08:20, 47.18it/s]

Ep 1400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


  6%|▌         | 1506/25000 [00:34<11:04, 35.34it/s]

Ep 1500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


  6%|▋         | 1608/25000 [00:36<08:45, 44.48it/s]

Ep 1600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


  7%|▋         | 1704/25000 [00:38<08:13, 47.16it/s]

Ep 1700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  7%|▋         | 1804/25000 [00:41<08:38, 44.76it/s]

Ep 1800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


  8%|▊         | 1909/25000 [00:43<08:12, 46.92it/s]

Ep 1900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


  8%|▊         | 2005/25000 [00:45<08:12, 46.65it/s]

Ep 2000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


  8%|▊         | 2107/25000 [00:47<09:44, 39.15it/s]

Ep 2100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


  9%|▉         | 2206/25000 [00:50<08:49, 43.01it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 7


  9%|▉         | 2306/25000 [00:52<07:57, 47.56it/s]

Ep 2300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 4


 10%|▉         | 2406/25000 [00:54<07:56, 47.42it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 10%|█         | 2503/25000 [00:56<10:02, 37.31it/s]

Ep 2500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 4


 10%|█         | 2605/25000 [00:59<10:50, 34.40it/s]

Ep 2600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 4


 11%|█         | 2705/25000 [01:02<10:34, 35.16it/s]

Ep 2700/25000, Opt. Action: 4, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.8, First Action 4


 11%|█         | 2809/25000 [01:05<08:06, 45.59it/s]

Ep 2800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


 12%|█▏        | 2907/25000 [01:07<08:08, 45.27it/s]

Ep 2900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 6


 12%|█▏        | 3007/25000 [01:09<07:51, 46.65it/s]

Ep 3000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 4


 12%|█▏        | 3108/25000 [01:11<07:31, 48.44it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.6, First Action 4


 13%|█▎        | 3205/25000 [01:13<07:32, 48.12it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 4


 13%|█▎        | 3304/25000 [01:16<09:58, 36.27it/s]

Ep 3300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 4


 14%|█▎        | 3409/25000 [01:18<07:23, 48.70it/s]

Ep 3400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 4


 14%|█▍        | 3509/25000 [01:20<07:29, 47.79it/s]

Ep 3500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 14%|█▍        | 3609/25000 [01:22<07:28, 47.72it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 9


 15%|█▍        | 3708/25000 [01:24<07:35, 46.73it/s]

Ep 3700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 4


 15%|█▌        | 3809/25000 [01:27<07:31, 46.98it/s]

Ep 3800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.4, First Action 4


 16%|█▌        | 3906/25000 [01:29<09:58, 35.26it/s]

Ep 3900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 16%|█▌        | 4006/25000 [01:32<07:35, 46.12it/s]

Ep 4000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.1, First Action 4


 16%|█▋        | 4106/25000 [01:34<07:38, 45.58it/s]

Ep 4100/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.6, First Action 4


 17%|█▋        | 4206/25000 [01:36<07:14, 47.88it/s]

Ep 4200/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.7, First Action 4


 17%|█▋        | 4306/25000 [01:38<07:14, 47.58it/s]

Ep 4300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 18%|█▊        | 4406/25000 [01:40<07:18, 46.93it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 4


 18%|█▊        | 4505/25000 [01:43<09:49, 34.77it/s]

Ep 4500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.3, First Action 4


 18%|█▊        | 4606/25000 [01:46<07:20, 46.30it/s]

Ep 4600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 4


 19%|█▉        | 4706/25000 [01:48<07:18, 46.29it/s]

Ep 4700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 4


 19%|█▉        | 4806/25000 [01:50<06:56, 48.54it/s]

Ep 4800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 4


 20%|█▉        | 4906/25000 [01:52<07:10, 46.64it/s]

Ep 4900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 20%|██        | 5006/25000 [01:54<07:00, 47.52it/s]

Ep 5000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.0, First Action 4


 20%|██        | 5105/25000 [01:57<09:39, 34.32it/s]

Ep 5100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 21%|██        | 5207/25000 [01:59<07:02, 46.81it/s]

Ep 5200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 21%|██        | 5307/25000 [02:01<07:06, 46.20it/s]

Ep 5300/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.9, First Action 4


 22%|██▏       | 5407/25000 [02:04<07:09, 45.61it/s]

Ep 5400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 22%|██▏       | 5505/25000 [02:06<06:59, 46.52it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 4


 22%|██▏       | 5606/25000 [02:08<07:24, 43.62it/s]

Ep 5600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 23%|██▎       | 5705/25000 [02:11<09:31, 33.79it/s]

Ep 5700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 23%|██▎       | 5808/25000 [02:13<06:49, 46.84it/s]

Ep 5800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 4


 24%|██▎       | 5908/25000 [02:15<06:47, 46.84it/s]

Ep 5900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 24%|██▍       | 6008/25000 [02:17<06:49, 46.32it/s]

Ep 6000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 24%|██▍       | 6108/25000 [02:19<06:35, 47.77it/s]

Ep 6100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.4, First Action 4


 25%|██▍       | 6203/25000 [02:22<07:35, 41.31it/s]

Ep 6200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 25%|██▌       | 6305/25000 [02:24<09:30, 32.80it/s]

Ep 6300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 4


 26%|██▌       | 6408/25000 [02:27<06:35, 46.95it/s]

Ep 6400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 4


 26%|██▌       | 6509/25000 [02:29<06:22, 48.37it/s]

Ep 6500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.9, First Action 4


 26%|██▋       | 6609/25000 [02:31<06:32, 46.82it/s]

Ep 6600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.3, First Action 4


 27%|██▋       | 6709/25000 [02:33<06:35, 46.30it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.7, First Action 4


 27%|██▋       | 6804/25000 [02:35<07:29, 40.48it/s]

Ep 6800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.9, First Action 4


 28%|██▊       | 6906/25000 [02:38<08:48, 34.25it/s]

Ep 6900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 28%|██▊       | 7009/25000 [02:41<06:38, 45.20it/s]

Ep 7000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 28%|██▊       | 7109/25000 [02:43<06:18, 47.25it/s]

Ep 7100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 4


 29%|██▉       | 7209/25000 [02:45<06:20, 46.72it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.6, First Action 4


 29%|██▉       | 7309/25000 [02:47<06:15, 47.11it/s]

Ep 7300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 4


 30%|██▉       | 7404/25000 [02:49<06:38, 44.12it/s]

Ep 7400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 30%|███       | 7504/25000 [02:52<08:26, 34.55it/s]

Ep 7500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.3, First Action 4


 30%|███       | 7608/25000 [02:54<06:00, 48.27it/s]

Ep 7600/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 4


 31%|███       | 7708/25000 [02:57<06:26, 44.73it/s]

Ep 7700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 31%|███       | 7809/25000 [02:59<06:02, 47.43it/s]

Ep 7800/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.8, First Action 4


 32%|███▏      | 7909/25000 [03:01<06:03, 47.02it/s]

Ep 7900/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 4


 32%|███▏      | 8006/25000 [03:03<06:03, 46.78it/s]

Ep 8000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 4


 32%|███▏      | 8106/25000 [03:06<08:09, 34.53it/s]

Ep 8100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 4


 33%|███▎      | 8209/25000 [03:08<05:49, 48.07it/s]

Ep 8200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 33%|███▎      | 8305/25000 [03:10<05:51, 47.53it/s]

Ep 8300/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 4


 34%|███▎      | 8405/25000 [03:12<05:43, 48.34it/s]

Ep 8400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.7, First Action 4


 34%|███▍      | 8505/25000 [03:14<05:47, 47.50it/s]

Ep 8500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 34%|███▍      | 8604/25000 [03:16<06:12, 44.02it/s]

Ep 8600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 35%|███▍      | 8706/25000 [03:19<07:53, 34.38it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 35%|███▌      | 8805/25000 [03:22<05:42, 47.27it/s]

Ep 8800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.3, First Action 4


 36%|███▌      | 8905/25000 [03:24<05:36, 47.89it/s]

Ep 8900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 4


 36%|███▌      | 9005/25000 [03:26<05:55, 44.99it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 36%|███▋      | 9105/25000 [03:28<05:44, 46.09it/s]

Ep 9100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 37%|███▋      | 9205/25000 [03:30<06:33, 40.10it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 37%|███▋      | 9303/25000 [03:33<07:35, 34.47it/s]

Ep 9300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 38%|███▊      | 9406/25000 [03:36<05:44, 45.23it/s]

Ep 9400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 4


 38%|███▊      | 9506/25000 [03:38<05:43, 45.14it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 38%|███▊      | 9606/25000 [03:40<05:32, 46.36it/s]

Ep 9600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 39%|███▉      | 9706/25000 [03:42<05:26, 46.83it/s]

Ep 9700/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.0, First Action 4


 39%|███▉      | 9806/25000 [03:44<05:59, 42.24it/s]

Ep 9800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 40%|███▉      | 9904/25000 [03:47<07:17, 34.48it/s]

Ep 9900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 4


 40%|████      | 10005/25000 [03:49<05:24, 46.24it/s]

Ep 10000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 4


 40%|████      | 10105/25000 [03:51<05:14, 47.33it/s]

Ep 10100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 41%|████      | 10205/25000 [03:53<05:21, 45.97it/s]

Ep 10200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 41%|████      | 10305/25000 [03:56<05:15, 46.57it/s]

Ep 10300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 4


 42%|████▏     | 10405/25000 [03:58<05:06, 47.60it/s]

Ep 10400/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 12.6, First Action 4


 42%|████▏     | 10505/25000 [04:00<06:56, 34.80it/s]

Ep 10500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 42%|████▏     | 10606/25000 [04:03<05:06, 46.97it/s]

Ep 10600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 4


 43%|████▎     | 10706/25000 [04:05<05:03, 47.12it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 4


 43%|████▎     | 10806/25000 [04:07<05:24, 43.72it/s]

Ep 10800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 44%|████▎     | 10906/25000 [04:09<05:03, 46.36it/s]

Ep 10900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 44%|████▍     | 11006/25000 [04:12<05:03, 46.13it/s]

Ep 11000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 4


 44%|████▍     | 11106/25000 [04:14<06:45, 34.24it/s]

Ep 11100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.2, First Action 4


 45%|████▍     | 11207/25000 [04:17<04:49, 47.69it/s]

Ep 11200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.3, First Action 4


 45%|████▌     | 11307/25000 [04:19<04:48, 47.43it/s]

Ep 11300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.0, First Action 4


 46%|████▌     | 11407/25000 [04:21<04:43, 47.89it/s]

Ep 11400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 4


 46%|████▌     | 11507/25000 [04:23<04:52, 46.16it/s]

Ep 11500/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 4


 46%|████▋     | 11607/25000 [04:25<04:39, 47.88it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 47%|████▋     | 11703/25000 [04:28<06:11, 35.80it/s]

Ep 11700/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.4, First Action 4


 47%|████▋     | 11804/25000 [04:30<04:44, 46.39it/s]

Ep 11800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 48%|████▊     | 11909/25000 [04:33<04:45, 45.91it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 48%|████▊     | 12009/25000 [04:35<04:36, 46.91it/s]

Ep 12000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.9, First Action 4


 48%|████▊     | 12105/25000 [04:37<04:33, 47.15it/s]

Ep 12100/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.4, First Action 4


 49%|████▉     | 12206/25000 [04:39<04:32, 46.90it/s]

Ep 12200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 49%|████▉     | 12306/25000 [04:42<06:14, 33.89it/s]

Ep 12300/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 4


 50%|████▉     | 12406/25000 [04:44<04:33, 46.05it/s]

Ep 12400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 50%|█████     | 12506/25000 [04:46<04:31, 45.99it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.7, First Action 4


 50%|█████     | 12607/25000 [04:48<04:21, 47.34it/s]

Ep 12600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 51%|█████     | 12707/25000 [04:51<04:26, 46.10it/s]

Ep 12700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 51%|█████     | 12807/25000 [04:53<04:20, 46.72it/s]

Ep 12800/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 4


 52%|█████▏    | 12903/25000 [04:55<06:16, 32.16it/s]

Ep 12900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 52%|█████▏    | 13005/25000 [04:58<04:15, 47.00it/s]

Ep 13000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 4


 52%|█████▏    | 13107/25000 [05:00<04:08, 47.80it/s]

Ep 13100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 4


 53%|█████▎    | 13207/25000 [05:02<04:06, 47.75it/s]

Ep 13200/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 4


 53%|█████▎    | 13306/25000 [05:04<04:06, 47.39it/s]

Ep 13300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 54%|█████▎    | 13406/25000 [05:06<04:18, 44.82it/s]

Ep 13400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 4


 54%|█████▍    | 13504/25000 [05:09<05:44, 33.40it/s]

Ep 13500/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 54%|█████▍    | 13609/25000 [05:12<04:01, 47.24it/s]

Ep 13600/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.7, First Action 4


 55%|█████▍    | 13709/25000 [05:14<04:00, 47.01it/s]

Ep 13700/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.3, First Action 4


 55%|█████▌    | 13809/25000 [05:16<04:04, 45.76it/s]

Ep 13800/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.0, First Action 4


 56%|█████▌    | 13909/25000 [05:18<03:57, 46.70it/s]

Ep 13900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 56%|█████▌    | 14006/25000 [05:21<05:04, 36.11it/s]

Ep 14000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 4


 56%|█████▋    | 14103/25000 [05:23<05:37, 32.32it/s]

Ep 14100/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 57%|█████▋    | 14203/25000 [05:26<05:11, 34.66it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.1, First Action 4


 57%|█████▋    | 14305/25000 [05:29<03:57, 45.01it/s]

Ep 14300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.4, First Action 4


 58%|█████▊    | 14405/25000 [05:31<03:48, 46.39it/s]

Ep 14400/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 12.9, First Action 4


 58%|█████▊    | 14505/25000 [05:33<03:41, 47.41it/s]

Ep 14500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 58%|█████▊    | 14608/25000 [05:35<03:37, 47.70it/s]

Ep 14600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 59%|█████▉    | 14706/25000 [05:37<04:35, 37.33it/s]

Ep 14700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 4


 59%|█████▉    | 14803/25000 [05:40<05:01, 33.82it/s]

Ep 14800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 60%|█████▉    | 14905/25000 [05:42<03:31, 47.74it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.7, First Action 4


 60%|██████    | 15005/25000 [05:45<03:36, 46.15it/s]

Ep 15000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.4, First Action 4


 60%|██████    | 15105/25000 [05:47<03:32, 46.56it/s]

Ep 15100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 61%|██████    | 15205/25000 [05:49<03:32, 46.00it/s]

Ep 15200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 61%|██████    | 15307/25000 [05:51<04:21, 37.12it/s]

Ep 15300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.2, First Action 4


 62%|██████▏   | 15407/25000 [05:54<04:33, 35.08it/s]

Ep 15400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 62%|██████▏   | 15506/25000 [05:56<03:18, 47.73it/s]

Ep 15500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 4


 62%|██████▏   | 15609/25000 [05:58<03:20, 46.92it/s]

Ep 15600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 4


 63%|██████▎   | 15709/25000 [06:00<03:16, 47.32it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 63%|██████▎   | 15804/25000 [06:02<03:21, 45.70it/s]

Ep 15800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 4


 64%|██████▎   | 15907/25000 [06:05<04:09, 36.51it/s]

Ep 15900/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.4, First Action 4


 64%|██████▍   | 16005/25000 [06:08<04:34, 32.74it/s]

Ep 16000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 4


 64%|██████▍   | 16108/25000 [06:10<03:06, 47.77it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 65%|██████▍   | 16209/25000 [06:12<03:03, 48.00it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.1, First Action 4


 65%|██████▌   | 16305/25000 [06:14<02:56, 49.18it/s]

Ep 16300/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.6, First Action 4


 66%|██████▌   | 16406/25000 [06:16<02:57, 48.45it/s]

Ep 16400/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 4


 66%|██████▌   | 16507/25000 [06:18<03:50, 36.92it/s]

Ep 16500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 66%|██████▋   | 16607/25000 [06:21<03:59, 35.01it/s]

Ep 16600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 67%|██████▋   | 16709/25000 [06:23<02:57, 46.82it/s]

Ep 16700/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.2, First Action 4


 67%|██████▋   | 16804/25000 [06:25<02:55, 46.70it/s]

Ep 16800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 4


 68%|██████▊   | 16904/25000 [06:28<03:04, 43.91it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 68%|██████▊   | 17009/25000 [06:30<02:53, 46.11it/s]

Ep 17000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 68%|██████▊   | 17104/25000 [06:32<03:21, 39.16it/s]

Ep 17100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 4


 69%|██████▉   | 17204/25000 [06:35<03:42, 35.07it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.0, First Action 4


 69%|██████▉   | 17308/25000 [06:37<02:44, 46.65it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.3, First Action 4


 70%|██████▉   | 17408/25000 [06:39<02:44, 46.11it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 12.6, First Action 4


 70%|███████   | 17508/25000 [06:42<02:39, 47.12it/s]

Ep 17500/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.2, First Action 4


 70%|███████   | 17609/25000 [06:44<02:39, 46.44it/s]

Ep 17600/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.5, First Action 4


 71%|███████   | 17704/25000 [06:46<03:09, 38.55it/s]

Ep 17700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 4


 71%|███████   | 17806/25000 [06:49<03:39, 32.84it/s]

Ep 17800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 72%|███████▏  | 17909/25000 [06:51<02:28, 47.69it/s]

Ep 17900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.9, First Action 4


 72%|███████▏  | 18009/25000 [06:53<02:27, 47.27it/s]

Ep 18000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 4


 72%|███████▏  | 18106/25000 [06:55<02:22, 48.38it/s]

Ep 18100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 4


 73%|███████▎  | 18207/25000 [06:57<02:24, 47.17it/s]

Ep 18200/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 4


 73%|███████▎  | 18302/25000 [06:59<02:41, 41.46it/s]

Ep 18300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 74%|███████▎  | 18405/25000 [07:02<03:14, 33.99it/s]

Ep 18400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 74%|███████▍  | 18508/25000 [07:05<02:17, 47.34it/s]

Ep 18500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 74%|███████▍  | 18605/25000 [07:07<02:14, 47.56it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 75%|███████▍  | 18705/25000 [07:09<02:14, 46.69it/s]

Ep 18700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 75%|███████▌  | 18806/25000 [07:11<02:12, 46.91it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 4


 76%|███████▌  | 18902/25000 [07:13<02:07, 47.97it/s]

Ep 18900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.7, First Action 4


 76%|███████▌  | 19006/25000 [07:16<02:47, 35.89it/s]

Ep 19000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 4


 76%|███████▋  | 19105/25000 [07:18<02:01, 48.43it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.9, First Action 4


 77%|███████▋  | 19206/25000 [07:20<01:58, 48.90it/s]

Ep 19200/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 4


 77%|███████▋  | 19306/25000 [07:22<02:03, 46.09it/s]

Ep 19300/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 4


 78%|███████▊  | 19408/25000 [07:25<02:04, 45.06it/s]

Ep 19400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 4


 78%|███████▊  | 19503/25000 [07:27<02:01, 45.41it/s]

Ep 19500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 4


 78%|███████▊  | 19605/25000 [07:29<02:39, 33.82it/s]

Ep 19600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.9, First Action 4


 79%|███████▉  | 19708/25000 [07:32<01:50, 47.86it/s]

Ep 19700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.9, First Action 4


 79%|███████▉  | 19808/25000 [07:34<01:51, 46.61it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 80%|███████▉  | 19908/25000 [07:36<01:46, 47.70it/s]

Ep 19900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 80%|████████  | 20008/25000 [07:38<01:46, 46.70it/s]

Ep 20000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 80%|████████  | 20108/25000 [07:40<01:48, 45.23it/s]

Ep 20100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.2, First Action 4


 81%|████████  | 20204/25000 [07:43<02:15, 35.29it/s]

Ep 20200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 81%|████████  | 20309/25000 [07:46<01:41, 46.07it/s]

Ep 20300/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.0, First Action 4


 82%|████████▏ | 20409/25000 [07:48<01:37, 46.98it/s]

Ep 20400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 82%|████████▏ | 20506/25000 [07:50<01:35, 46.98it/s]

Ep 20500/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.4, First Action 4


 82%|████████▏ | 20607/25000 [07:52<01:31, 47.81it/s]

Ep 20600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 4


 83%|████████▎ | 20709/25000 [07:54<01:28, 48.73it/s]

Ep 20700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 4


 83%|████████▎ | 20805/25000 [07:57<01:49, 38.36it/s]

Ep 20800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 84%|████████▎ | 20908/25000 [07:59<01:24, 48.29it/s]

Ep 20900/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 4


 84%|████████▍ | 21009/25000 [08:01<01:25, 46.62it/s]

Ep 21000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 84%|████████▍ | 21105/25000 [08:03<01:20, 48.47it/s]

Ep 21100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 85%|████████▍ | 21205/25000 [08:05<01:22, 45.93it/s]

Ep 21200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 4


 85%|████████▌ | 21305/25000 [08:08<01:18, 46.90it/s]

Ep 21300/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 4


 86%|████████▌ | 21408/25000 [08:10<01:31, 39.35it/s]

Ep 21400/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.4, First Action 4


 86%|████████▌ | 21509/25000 [08:13<01:13, 47.26it/s]

Ep 21500/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 4


 86%|████████▋ | 21609/25000 [08:15<01:12, 46.55it/s]

Ep 21600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 87%|████████▋ | 21709/25000 [08:17<01:08, 47.81it/s]

Ep 21700/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 4


 87%|████████▋ | 21809/25000 [08:19<01:06, 47.68it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 4


 88%|████████▊ | 21909/25000 [08:21<01:05, 47.39it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.4, First Action 4


 88%|████████▊ | 22005/25000 [08:24<01:20, 37.20it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 88%|████████▊ | 22106/25000 [08:26<01:04, 44.69it/s]

Ep 22100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 89%|████████▉ | 22207/25000 [08:29<00:58, 47.68it/s]

Ep 22200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 89%|████████▉ | 22309/25000 [08:31<00:56, 47.54it/s]

Ep 22300/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.7, First Action 4


 90%|████████▉ | 22409/25000 [08:33<00:54, 47.36it/s]

Ep 22400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 90%|█████████ | 22509/25000 [08:35<00:53, 46.76it/s]

Ep 22500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 4


 90%|█████████ | 22606/25000 [08:38<01:07, 35.46it/s]

Ep 22600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.5, First Action 4


 91%|█████████ | 22709/25000 [08:40<00:49, 46.06it/s]

Ep 22700/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.8, First Action 4


 91%|█████████ | 22805/25000 [08:42<00:45, 47.94it/s]

Ep 22800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 12.8, First Action 4


 92%|█████████▏| 22906/25000 [08:44<00:44, 47.51it/s]

Ep 22900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 12.6, First Action 4


 92%|█████████▏| 23007/25000 [08:47<00:43, 45.73it/s]

Ep 23000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 92%|█████████▏| 23107/25000 [08:49<00:42, 44.14it/s]

Ep 23100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 93%|█████████▎| 23204/25000 [08:51<00:46, 38.28it/s]

Ep 23200/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 93%|█████████▎| 23307/25000 [08:54<00:37, 45.66it/s]

Ep 23300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 94%|█████████▎| 23407/25000 [08:56<00:33, 46.88it/s]

Ep 23400/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.9, First Action 4


 94%|█████████▍| 23507/25000 [08:58<00:31, 47.19it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 94%|█████████▍| 23607/25000 [09:00<00:30, 46.10it/s]

Ep 23600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 95%|█████████▍| 23707/25000 [09:02<00:27, 46.18it/s]

Ep 23700/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.9, First Action 4


 95%|█████████▌| 23804/25000 [09:05<00:32, 36.27it/s]

Ep 23800/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 4


 96%|█████████▌| 23909/25000 [09:08<00:23, 46.64it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 96%|█████████▌| 24005/25000 [09:10<00:21, 47.01it/s]

Ep 24000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 4


 96%|█████████▋| 24105/25000 [09:12<00:19, 46.44it/s]

Ep 24100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 4


 97%|█████████▋| 24205/25000 [09:14<00:16, 47.61it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.1, First Action 4


 97%|█████████▋| 24305/25000 [09:16<00:14, 46.71it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.1, First Action 4


 98%|█████████▊| 24406/25000 [09:19<00:16, 36.37it/s]

Ep 24400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.0, First Action 4


 98%|█████████▊| 24506/25000 [09:22<00:11, 44.69it/s]

Ep 24500/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.6, First Action 4


 98%|█████████▊| 24607/25000 [09:24<00:08, 46.64it/s]

Ep 24600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 12.8, First Action 4


 99%|█████████▉| 24708/25000 [09:26<00:06, 47.67it/s]

Ep 24700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 4


 99%|█████████▉| 24808/25000 [09:28<00:04, 46.07it/s]

Ep 24800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


100%|█████████▉| 24908/25000 [09:30<00:01, 46.67it/s]

Ep 24900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 4


100%|██████████| 25000/25000 [09:33<00:00, 43.63it/s]


Ep 25000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 4

TEST:


 38%|███▊      | 113/300 [00:01<00:01, 107.26it/s]

Ep 100/300, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.3, First Action 4


 75%|███████▌  | 226/300 [00:01<00:00, 146.87it/s]

Ep 200/300, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 4


100%|██████████| 300/300 [00:02<00:00, 122.53it/s]


Ep 300/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4

GAMMA 1 - LR 0.0001 - Entropy Decay False


  0%|          | 105/25000 [00:02<08:50, 46.97it/s]

Ep 100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 8


  1%|          | 205/25000 [00:04<08:52, 46.53it/s]

Ep 200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


  1%|          | 305/25000 [00:06<08:51, 46.46it/s]

Ep 300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  2%|▏         | 405/25000 [00:08<08:47, 46.64it/s]

Ep 400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


  2%|▏         | 506/25000 [00:11<10:52, 37.52it/s]

Ep 500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  2%|▏         | 606/25000 [00:14<11:14, 36.15it/s]

Ep 600/25000, Opt. Action: 0, Reward: 12.100000001490116, Cumulative-Regret: 12.899999998509884, AVG100-Regret: 18.0, First Action 9


  3%|▎         | 707/25000 [00:17<08:46, 46.13it/s]

Ep 700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  3%|▎         | 808/25000 [00:19<08:22, 48.13it/s]

Ep 800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


  4%|▎         | 909/25000 [00:21<08:24, 47.78it/s]

Ep 900/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.4, First Action 7


  4%|▍         | 1009/25000 [00:23<08:30, 47.01it/s]

Ep 1000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


  4%|▍         | 1105/25000 [00:25<10:30, 37.89it/s]

Ep 1100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 10


  5%|▍         | 1206/25000 [00:28<11:20, 34.98it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


  5%|▌         | 1305/25000 [00:30<08:17, 47.65it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


  6%|▌         | 1407/25000 [00:33<08:24, 46.72it/s]

Ep 1400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


  6%|▌         | 1507/25000 [00:35<08:32, 45.80it/s]

Ep 1500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  6%|▋         | 1607/25000 [00:37<08:22, 46.54it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  7%|▋         | 1705/25000 [00:39<10:18, 37.66it/s]

Ep 1700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  7%|▋         | 1806/25000 [00:42<10:55, 35.40it/s]

Ep 1800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


  8%|▊         | 1908/25000 [00:44<08:16, 46.52it/s]

Ep 1900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 6


  8%|▊         | 2008/25000 [00:46<08:07, 47.21it/s]

Ep 2000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


  8%|▊         | 2108/25000 [00:48<08:23, 45.44it/s]

Ep 2100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


  9%|▉         | 2208/25000 [00:51<08:10, 46.50it/s]

Ep 2200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


  9%|▉         | 2304/25000 [00:53<09:44, 38.80it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 10%|▉         | 2404/25000 [00:56<11:38, 32.33it/s]

Ep 2400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 0


 10%|█         | 2509/25000 [00:58<07:51, 47.70it/s]

Ep 2500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 10%|█         | 2609/25000 [01:00<07:48, 47.76it/s]

Ep 2600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 11%|█         | 2709/25000 [01:02<07:47, 47.71it/s]

Ep 2700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 11%|█         | 2809/25000 [01:04<07:53, 46.84it/s]

Ep 2800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 12%|█▏        | 2905/25000 [01:07<09:27, 38.90it/s]

Ep 2900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 12%|█▏        | 3005/25000 [01:09<10:54, 33.61it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 12%|█▏        | 3105/25000 [01:12<07:46, 46.92it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 13%|█▎        | 3205/25000 [01:14<07:40, 47.30it/s]

Ep 3200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 13%|█▎        | 3305/25000 [01:16<07:36, 47.49it/s]

Ep 3300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 8


 14%|█▎        | 3405/25000 [01:18<07:32, 47.72it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 14%|█▍        | 3506/25000 [01:20<09:11, 38.97it/s]

Ep 3500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 14%|█▍        | 3605/25000 [01:23<10:04, 35.39it/s]

Ep 3600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 15%|█▍        | 3708/25000 [01:25<08:07, 43.64it/s]

Ep 3700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 15%|█▌        | 3805/25000 [01:27<07:31, 46.90it/s]

Ep 3800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 16%|█▌        | 3905/25000 [01:30<07:41, 45.73it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 16%|█▌        | 4005/25000 [01:32<07:37, 45.86it/s]

Ep 4000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 4


 16%|█▋        | 4105/25000 [01:34<07:46, 44.80it/s]

Ep 4100/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 0


 17%|█▋        | 4206/25000 [01:37<09:51, 35.15it/s]

Ep 4200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 17%|█▋        | 4308/25000 [01:39<07:15, 47.47it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 18%|█▊        | 4405/25000 [01:41<07:12, 47.66it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 6


 18%|█▊        | 4505/25000 [01:43<07:06, 48.10it/s]

Ep 4500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 18%|█▊        | 4605/25000 [01:45<07:25, 45.78it/s]

Ep 4600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 19%|█▉        | 4705/25000 [01:48<07:08, 47.36it/s]

Ep 4700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 19%|█▉        | 4805/25000 [01:50<09:45, 34.48it/s]

Ep 4800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 20%|█▉        | 4910/25000 [01:53<06:53, 48.58it/s]

Ep 4900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 20%|██        | 5006/25000 [01:55<07:04, 47.10it/s]

Ep 5000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 20%|██        | 5106/25000 [01:57<06:56, 47.78it/s]

Ep 5100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 21%|██        | 5206/25000 [01:59<06:55, 47.60it/s]

Ep 5200/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 8


 21%|██        | 5306/25000 [02:01<07:12, 45.58it/s]

Ep 5300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 8


 22%|██▏       | 5404/25000 [02:04<09:22, 34.81it/s]

Ep 5400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 22%|██▏       | 5508/25000 [02:07<06:53, 47.19it/s]

Ep 5500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 22%|██▏       | 5608/25000 [02:09<06:51, 47.08it/s]

Ep 5600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 23%|██▎       | 5708/25000 [02:11<06:42, 47.97it/s]

Ep 5700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 23%|██▎       | 5808/25000 [02:13<06:38, 48.22it/s]

Ep 5800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 0


 24%|██▎       | 5908/25000 [02:15<06:42, 47.38it/s]

Ep 5900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 24%|██▍       | 6004/25000 [02:18<08:46, 36.08it/s]

Ep 6000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 24%|██▍       | 6107/25000 [02:20<06:35, 47.81it/s]

Ep 6100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 25%|██▍       | 6208/25000 [02:22<06:57, 45.00it/s]

Ep 6200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


 25%|██▌       | 6308/25000 [02:25<06:48, 45.75it/s]

Ep 6300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


 26%|██▌       | 6408/25000 [02:27<06:41, 46.30it/s]

Ep 6400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 26%|██▌       | 6508/25000 [02:29<06:37, 46.51it/s]

Ep 6500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 26%|██▋       | 6605/25000 [02:32<08:43, 35.14it/s]

Ep 6600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 27%|██▋       | 6705/25000 [02:34<06:30, 46.91it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 27%|██▋       | 6805/25000 [02:36<06:23, 47.44it/s]

Ep 6800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 28%|██▊       | 6905/25000 [02:38<06:37, 45.51it/s]

Ep 6900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 28%|██▊       | 7005/25000 [02:41<06:27, 46.40it/s]

Ep 7000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


 28%|██▊       | 7105/25000 [02:43<06:24, 46.57it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 29%|██▉       | 7205/25000 [02:45<08:41, 34.11it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 6


 29%|██▉       | 7305/25000 [02:48<06:14, 47.21it/s]

Ep 7300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 30%|██▉       | 7405/25000 [02:50<06:11, 47.39it/s]

Ep 7400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 30%|███       | 7506/25000 [02:52<06:02, 48.21it/s]

Ep 7500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 8


 30%|███       | 7606/25000 [02:54<06:01, 48.18it/s]

Ep 7600/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 7


 31%|███       | 7707/25000 [02:56<06:17, 45.79it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 31%|███       | 7806/25000 [02:59<07:47, 36.77it/s]

Ep 7800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 32%|███▏      | 7906/25000 [03:02<06:03, 47.04it/s]

Ep 7900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 32%|███▏      | 8007/25000 [03:04<05:56, 47.62it/s]

Ep 8000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.6, First Action 8


 32%|███▏      | 8107/25000 [03:06<06:05, 46.24it/s]

Ep 8100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 33%|███▎      | 8207/25000 [03:08<05:54, 47.37it/s]

Ep 8200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 33%|███▎      | 8308/25000 [03:10<05:56, 46.83it/s]

Ep 8300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 34%|███▎      | 8406/25000 [03:13<07:41, 35.94it/s]

Ep 8400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 34%|███▍      | 8508/25000 [03:15<05:53, 46.59it/s]

Ep 8500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 34%|███▍      | 8605/25000 [03:17<06:03, 45.11it/s]

Ep 8600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 35%|███▍      | 8705/25000 [03:20<05:50, 46.44it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 35%|███▌      | 8806/25000 [03:22<05:39, 47.69it/s]

Ep 8800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 36%|███▌      | 8908/25000 [03:24<05:40, 47.29it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 36%|███▌      | 9006/25000 [03:26<07:30, 35.50it/s]

Ep 9000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.7, First Action 8


 36%|███▋      | 9108/25000 [03:29<05:38, 47.00it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 37%|███▋      | 9208/25000 [03:31<05:31, 47.58it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 37%|███▋      | 9308/25000 [03:33<05:32, 47.18it/s]

Ep 9300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 38%|███▊      | 9409/25000 [03:36<05:31, 47.00it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 38%|███▊      | 9509/25000 [03:38<05:29, 47.05it/s]

Ep 9500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 38%|███▊      | 9604/25000 [03:40<06:52, 37.31it/s]

Ep 9600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 5


 39%|███▉      | 9705/25000 [03:43<05:34, 45.77it/s]

Ep 9700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 39%|███▉      | 9805/25000 [03:45<05:13, 48.46it/s]

Ep 9800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 40%|███▉      | 9908/25000 [03:47<05:08, 48.96it/s]

Ep 9900/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 2


 40%|████      | 10008/25000 [03:49<05:14, 47.69it/s]

Ep 10000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 40%|████      | 10108/25000 [03:51<05:11, 47.82it/s]

Ep 10100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 41%|████      | 10204/25000 [03:54<06:40, 36.91it/s]

Ep 10200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 8


 41%|████      | 10305/25000 [03:56<05:19, 45.92it/s]

Ep 10300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 42%|████▏     | 10406/25000 [03:59<05:08, 47.38it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 5


 42%|████▏     | 10507/25000 [04:01<05:08, 46.98it/s]

Ep 10500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 42%|████▏     | 10607/25000 [04:03<05:11, 46.28it/s]

Ep 10600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 43%|████▎     | 10707/25000 [04:05<05:06, 46.65it/s]

Ep 10700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 43%|████▎     | 10807/25000 [04:08<06:40, 35.44it/s]

Ep 10800/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 44%|████▎     | 10907/25000 [04:10<05:10, 45.43it/s]

Ep 10900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 44%|████▍     | 11007/25000 [04:12<05:01, 46.36it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.4, First Action 8


 44%|████▍     | 11107/25000 [04:14<05:05, 45.53it/s]

Ep 11100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 8


 45%|████▍     | 11207/25000 [04:17<04:56, 46.59it/s]

Ep 11200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 45%|████▌     | 11308/25000 [04:19<04:55, 46.32it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 2


 46%|████▌     | 11405/25000 [04:21<06:19, 35.78it/s]

Ep 11400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 46%|████▌     | 11508/25000 [04:24<04:55, 45.72it/s]

Ep 11500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 46%|████▋     | 11608/25000 [04:26<04:43, 47.17it/s]

Ep 11600/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 7


 47%|████▋     | 11708/25000 [04:28<04:43, 46.89it/s]

Ep 11700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 47%|████▋     | 11808/25000 [04:31<04:43, 46.47it/s]

Ep 11800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 48%|████▊     | 11908/25000 [04:33<04:38, 47.00it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 17.9, First Action 10


 48%|████▊     | 12005/25000 [04:35<05:59, 36.17it/s]

Ep 12000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 48%|████▊     | 12105/25000 [04:38<05:53, 36.48it/s]

Ep 12100/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 9


 49%|████▉     | 12206/25000 [04:41<05:39, 37.65it/s]

Ep 12200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 49%|████▉     | 12306/25000 [04:43<04:26, 47.55it/s]

Ep 12300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 50%|████▉     | 12407/25000 [04:45<04:25, 47.43it/s]

Ep 12400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 50%|█████     | 12502/25000 [04:47<04:43, 44.09it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 50%|█████     | 12606/25000 [04:50<05:47, 35.70it/s]

Ep 12600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 51%|█████     | 12707/25000 [04:53<04:20, 47.21it/s]

Ep 12700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 51%|█████     | 12807/25000 [04:55<04:14, 47.95it/s]

Ep 12800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 52%|█████▏    | 12907/25000 [04:57<04:09, 48.39it/s]

Ep 12900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 7


 52%|█████▏    | 13007/25000 [04:59<04:19, 46.18it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 52%|█████▏    | 13107/25000 [05:01<04:13, 46.91it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 53%|█████▎    | 13206/25000 [05:04<05:28, 35.85it/s]

Ep 13200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 53%|█████▎    | 13305/25000 [05:06<04:14, 45.92it/s]

Ep 13300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 54%|█████▎    | 13405/25000 [05:09<04:11, 46.12it/s]

Ep 13400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 54%|█████▍    | 13506/25000 [05:11<04:03, 47.29it/s]

Ep 13500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 54%|█████▍    | 13607/25000 [05:13<04:05, 46.33it/s]

Ep 13600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 55%|█████▍    | 13708/25000 [05:15<03:51, 48.78it/s]

Ep 13700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 3


 55%|█████▌    | 13806/25000 [05:18<05:12, 35.86it/s]

Ep 13800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 56%|█████▌    | 13905/25000 [05:20<03:55, 47.03it/s]

Ep 13900/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 56%|█████▌    | 14005/25000 [05:22<03:54, 46.95it/s]

Ep 14000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 56%|█████▋    | 14105/25000 [05:24<03:55, 46.30it/s]

Ep 14100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 8


 57%|█████▋    | 14205/25000 [05:27<03:53, 46.31it/s]

Ep 14200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 57%|█████▋    | 14305/25000 [05:29<03:48, 46.73it/s]

Ep 14300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 58%|█████▊    | 14403/25000 [05:31<05:00, 35.31it/s]

Ep 14400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 58%|█████▊    | 14509/25000 [05:34<03:41, 47.26it/s]

Ep 14500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 58%|█████▊    | 14609/25000 [05:36<03:39, 47.26it/s]

Ep 14600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 59%|█████▉    | 14709/25000 [05:38<03:36, 47.53it/s]

Ep 14700/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 7


 59%|█████▉    | 14809/25000 [05:40<03:38, 46.68it/s]

Ep 14800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 60%|█████▉    | 14904/25000 [05:42<03:43, 45.16it/s]

Ep 14900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 60%|██████    | 15006/25000 [05:45<04:48, 34.59it/s]

Ep 15000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 60%|██████    | 15106/25000 [05:48<03:29, 47.17it/s]

Ep 15100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 61%|██████    | 15206/25000 [05:50<03:36, 45.27it/s]

Ep 15200/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 7


 61%|██████    | 15306/25000 [05:52<03:28, 46.47it/s]

Ep 15300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 62%|██████▏   | 15406/25000 [05:54<03:27, 46.17it/s]

Ep 15400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 62%|██████▏   | 15506/25000 [05:56<03:30, 45.00it/s]

Ep 15500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 7


 62%|██████▏   | 15605/25000 [05:59<04:45, 32.92it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 63%|██████▎   | 15705/25000 [06:02<03:19, 46.66it/s]

Ep 15700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 63%|██████▎   | 15805/25000 [06:04<03:17, 46.58it/s]

Ep 15800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 64%|██████▎   | 15905/25000 [06:06<03:16, 46.29it/s]

Ep 15900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


 64%|██████▍   | 16005/25000 [06:08<03:09, 47.36it/s]

Ep 16000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 64%|██████▍   | 16105/25000 [06:10<03:06, 47.60it/s]

Ep 16100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 65%|██████▍   | 16204/25000 [06:13<04:12, 34.86it/s]

Ep 16200/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.9, First Action 7


 65%|██████▌   | 16309/25000 [06:16<03:05, 46.77it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 7


 66%|██████▌   | 16409/25000 [06:18<03:07, 45.88it/s]

Ep 16400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


 66%|██████▌   | 16509/25000 [06:20<03:06, 45.60it/s]

Ep 16500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 66%|██████▋   | 16609/25000 [06:22<02:59, 46.74it/s]

Ep 16600/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 3


 67%|██████▋   | 16704/25000 [06:24<03:00, 46.00it/s]

Ep 16700/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 67%|██████▋   | 16803/25000 [06:27<04:07, 33.16it/s]

Ep 16800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 68%|██████▊   | 16908/25000 [06:30<02:56, 45.80it/s]

Ep 16900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 68%|██████▊   | 17008/25000 [06:32<02:52, 46.28it/s]

Ep 17000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 68%|██████▊   | 17108/25000 [06:34<02:48, 46.80it/s]

Ep 17100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 69%|██████▉   | 17208/25000 [06:36<02:48, 46.30it/s]

Ep 17200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 69%|██████▉   | 17303/25000 [06:38<02:45, 46.55it/s]

Ep 17300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 70%|██████▉   | 17406/25000 [06:41<03:50, 32.89it/s]

Ep 17400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 70%|███████   | 17509/25000 [06:44<02:38, 47.17it/s]

Ep 17500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 70%|███████   | 17609/25000 [06:46<02:41, 45.85it/s]

Ep 17600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 71%|███████   | 17709/25000 [06:48<02:38, 46.05it/s]

Ep 17700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 71%|███████   | 17809/25000 [06:50<02:35, 46.15it/s]

Ep 17800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 72%|███████▏  | 17904/25000 [06:52<03:07, 37.86it/s]

Ep 17900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 72%|███████▏  | 18004/25000 [06:55<03:26, 33.92it/s]

Ep 18000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 72%|███████▏  | 18107/25000 [06:58<02:30, 45.77it/s]

Ep 18100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 73%|███████▎  | 18207/25000 [07:00<02:25, 46.83it/s]

Ep 18200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 73%|███████▎  | 18307/25000 [07:02<02:22, 47.04it/s]

Ep 18300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 74%|███████▎  | 18407/25000 [07:04<02:20, 46.92it/s]

Ep 18400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 74%|███████▍  | 18502/25000 [07:06<02:35, 41.67it/s]

Ep 18500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 74%|███████▍  | 18605/25000 [07:09<03:06, 34.21it/s]

Ep 18600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 75%|███████▍  | 18708/25000 [07:12<02:14, 46.92it/s]

Ep 18700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 75%|███████▌  | 18809/25000 [07:14<02:07, 48.50it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.8, First Action 3


 76%|███████▌  | 18909/25000 [07:16<02:09, 46.96it/s]

Ep 18900/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 8


 76%|███████▌  | 19005/25000 [07:18<02:07, 47.19it/s]

Ep 19000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 76%|███████▋  | 19105/25000 [07:20<02:10, 45.34it/s]

Ep 19100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 77%|███████▋  | 19207/25000 [07:23<02:44, 35.20it/s]

Ep 19200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 4


 77%|███████▋  | 19308/25000 [07:25<02:07, 44.56it/s]

Ep 19300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 78%|███████▊  | 19408/25000 [07:28<02:03, 45.13it/s]

Ep 19400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 3


 78%|███████▊  | 19508/25000 [07:30<01:56, 47.05it/s]

Ep 19500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 78%|███████▊  | 19608/25000 [07:32<01:54, 46.94it/s]

Ep 19600/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.6, First Action 8


 79%|███████▉  | 19704/25000 [07:34<01:55, 45.91it/s]

Ep 19700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 79%|███████▉  | 19804/25000 [07:37<02:33, 33.85it/s]

Ep 19800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 80%|███████▉  | 19905/25000 [07:39<01:48, 46.86it/s]

Ep 19900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 80%|████████  | 20005/25000 [07:41<01:49, 45.79it/s]

Ep 20000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 80%|████████  | 20105/25000 [07:44<01:49, 44.80it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 1


 81%|████████  | 20205/25000 [07:46<01:43, 46.48it/s]

Ep 20200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 81%|████████  | 20305/25000 [07:48<01:52, 41.58it/s]

Ep 20300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 82%|████████▏ | 20404/25000 [07:51<02:09, 35.44it/s]

Ep 20400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 82%|████████▏ | 20506/25000 [07:53<01:37, 46.04it/s]

Ep 20500/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.3, First Action 8


 82%|████████▏ | 20606/25000 [07:55<01:33, 46.89it/s]

Ep 20600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 83%|████████▎ | 20707/25000 [07:57<01:31, 46.68it/s]

Ep 20700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 7


 83%|████████▎ | 20807/25000 [08:00<01:30, 46.18it/s]

Ep 20800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 84%|████████▎ | 20907/25000 [08:02<01:28, 46.05it/s]

Ep 20900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 84%|████████▍ | 21004/25000 [08:05<01:57, 34.00it/s]

Ep 21000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 4


 84%|████████▍ | 21109/25000 [08:07<01:21, 47.93it/s]

Ep 21100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 8


 85%|████████▍ | 21209/25000 [08:09<01:20, 47.03it/s]

Ep 21200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 85%|████████▌ | 21309/25000 [08:11<01:20, 46.13it/s]

Ep 21300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 86%|████████▌ | 21409/25000 [08:13<01:16, 47.12it/s]

Ep 21400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 86%|████████▌ | 21509/25000 [08:16<01:15, 46.51it/s]

Ep 21500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 86%|████████▋ | 21603/25000 [08:18<01:39, 34.04it/s]

Ep 21600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 87%|████████▋ | 21706/25000 [08:21<01:11, 46.21it/s]

Ep 21700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 87%|████████▋ | 21806/25000 [08:23<01:07, 47.55it/s]

Ep 21800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 88%|████████▊ | 21906/25000 [08:25<01:05, 47.30it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 3


 88%|████████▊ | 22006/25000 [08:27<01:03, 47.41it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 88%|████████▊ | 22106/25000 [08:29<01:05, 44.17it/s]

Ep 22100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


 89%|████████▉ | 22206/25000 [08:32<01:19, 35.23it/s]

Ep 22200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 89%|████████▉ | 22306/25000 [08:35<00:57, 46.63it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 90%|████████▉ | 22406/25000 [08:37<00:54, 47.56it/s]

Ep 22400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 90%|█████████ | 22506/25000 [08:39<00:53, 46.26it/s]

Ep 22500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 90%|█████████ | 22606/25000 [08:41<00:51, 46.74it/s]

Ep 22600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 91%|█████████ | 22706/25000 [08:43<00:49, 46.51it/s]

Ep 22700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 91%|█████████ | 22804/25000 [08:46<01:02, 35.16it/s]

Ep 22800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 92%|█████████▏| 22905/25000 [08:48<00:46, 45.01it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 92%|█████████▏| 23005/25000 [08:51<00:43, 45.84it/s]

Ep 23000/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 7


 92%|█████████▏| 23106/25000 [08:53<00:40, 47.16it/s]

Ep 23100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 93%|█████████▎| 23206/25000 [08:55<00:37, 47.79it/s]

Ep 23200/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 4


 93%|█████████▎| 23306/25000 [08:57<00:35, 47.89it/s]

Ep 23300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 94%|█████████▎| 23406/25000 [09:00<00:47, 33.63it/s]

Ep 23400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 94%|█████████▍| 23506/25000 [09:02<00:31, 47.92it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 94%|█████████▍| 23605/25000 [09:05<00:36, 38.09it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


 95%|█████████▍| 23705/25000 [09:07<00:29, 44.46it/s]

Ep 23700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 95%|█████████▌| 23805/25000 [09:10<00:25, 46.40it/s]

Ep 23800/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.7, First Action 8


 96%|█████████▌| 23905/25000 [09:12<00:28, 38.26it/s]

Ep 23900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 96%|█████████▌| 24005/25000 [09:15<00:30, 32.97it/s]

Ep 24000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 96%|█████████▋| 24105/25000 [09:17<00:19, 46.67it/s]

Ep 24100/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 1


 97%|█████████▋| 24206/25000 [09:19<00:16, 47.53it/s]

Ep 24200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 97%|█████████▋| 24307/25000 [09:21<00:14, 47.89it/s]

Ep 24300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 98%|█████████▊| 24407/25000 [09:23<00:12, 45.80it/s]

Ep 24400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 98%|█████████▊| 24504/25000 [09:26<00:13, 37.17it/s]

Ep 24500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 98%|█████████▊| 24607/25000 [09:29<00:11, 33.96it/s]

Ep 24600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 99%|█████████▉| 24707/25000 [09:31<00:06, 46.31it/s]

Ep 24700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 99%|█████████▉| 24807/25000 [09:33<00:04, 46.80it/s]

Ep 24800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


100%|█████████▉| 24907/25000 [09:35<00:01, 47.83it/s]

Ep 24900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


100%|██████████| 25000/25000 [09:37<00:00, 43.29it/s]


Ep 25000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 7

TEST:


 40%|████      | 120/300 [00:00<00:01, 150.37it/s]

Ep 100/300, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 70%|███████   | 210/300 [00:01<00:00, 129.47it/s]

Ep 200/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


100%|██████████| 300/300 [00:02<00:00, 130.16it/s]


Ep 300/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7

GAMMA 0.9 - LR 0.0001 - Entropy Decay False


  0%|          | 104/25000 [00:02<10:42, 38.74it/s]

Ep 100/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.4, First Action 10


  1%|          | 209/25000 [00:05<08:57, 46.11it/s]

Ep 200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


  1%|          | 309/25000 [00:07<08:49, 46.67it/s]

Ep 300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


  2%|▏         | 409/25000 [00:09<08:43, 46.99it/s]

Ep 400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 5


  2%|▏         | 509/25000 [00:11<08:40, 47.09it/s]

Ep 500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 10


  2%|▏         | 605/25000 [00:14<10:32, 38.56it/s]

Ep 600/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 4


  3%|▎         | 709/25000 [00:16<09:31, 42.50it/s]

Ep 700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


  3%|▎         | 809/25000 [00:18<08:38, 46.61it/s]

Ep 800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


  4%|▎         | 909/25000 [00:21<09:03, 44.36it/s]

Ep 900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


  4%|▍         | 1009/25000 [00:23<08:28, 47.14it/s]

Ep 1000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  4%|▍         | 1109/25000 [00:25<08:20, 47.72it/s]

Ep 1100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


  5%|▍         | 1205/25000 [00:27<11:25, 34.69it/s]

Ep 1200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


  5%|▌         | 1307/25000 [00:30<08:47, 44.94it/s]

Ep 1300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


  6%|▌         | 1407/25000 [00:32<08:22, 46.95it/s]

Ep 1400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


  6%|▌         | 1507/25000 [00:34<08:23, 46.64it/s]

Ep 1500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  6%|▋         | 1607/25000 [00:37<08:30, 45.78it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


  7%|▋         | 1707/25000 [00:39<08:23, 46.30it/s]

Ep 1700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


  7%|▋         | 1806/25000 [00:41<10:43, 36.05it/s]

Ep 1800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


  8%|▊         | 1908/25000 [00:44<08:34, 44.86it/s]

Ep 1900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


  8%|▊         | 2008/25000 [00:46<08:10, 46.84it/s]

Ep 2000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  8%|▊         | 2108/25000 [00:48<08:18, 45.92it/s]

Ep 2100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


  9%|▉         | 2209/25000 [00:50<08:05, 46.99it/s]

Ep 2200/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.2, First Action 0


  9%|▉         | 2309/25000 [00:53<08:01, 47.09it/s]

Ep 2300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 10%|▉         | 2406/25000 [00:55<09:48, 38.40it/s]

Ep 2400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 10%|█         | 2508/25000 [00:58<08:24, 44.62it/s]

Ep 2500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 10%|█         | 2608/25000 [01:00<07:58, 46.77it/s]

Ep 2600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 11%|█         | 2708/25000 [01:02<08:02, 46.16it/s]

Ep 2700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 11%|█         | 2808/25000 [01:04<07:36, 48.60it/s]

Ep 2800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 12%|█▏        | 2908/25000 [01:06<07:56, 46.40it/s]

Ep 2900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 12%|█▏        | 3007/25000 [01:09<09:50, 37.28it/s]

Ep 3000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 12%|█▏        | 3107/25000 [01:12<08:15, 44.14it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 13%|█▎        | 3207/25000 [01:14<07:40, 47.35it/s]

Ep 3200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 13%|█▎        | 3307/25000 [01:16<07:34, 47.78it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 3


 14%|█▎        | 3407/25000 [01:18<07:36, 47.28it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 14%|█▍        | 3507/25000 [01:20<07:38, 46.88it/s]

Ep 3500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 2


 14%|█▍        | 3603/25000 [01:23<10:33, 33.76it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 15%|█▍        | 3708/25000 [01:25<08:02, 44.09it/s]

Ep 3700/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 15%|█▌        | 3808/25000 [01:28<07:47, 45.35it/s]

Ep 3800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 16%|█▌        | 3908/25000 [01:30<07:32, 46.58it/s]

Ep 3900/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 5


 16%|█▌        | 4008/25000 [01:32<07:26, 47.02it/s]

Ep 4000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 9


 16%|█▋        | 4108/25000 [01:34<07:15, 48.00it/s]

Ep 4100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 17%|█▋        | 4207/25000 [01:36<09:07, 38.01it/s]

Ep 4200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 17%|█▋        | 4307/25000 [01:39<07:58, 43.28it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 0


 18%|█▊        | 4408/25000 [01:41<07:15, 47.25it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 18%|█▊        | 4508/25000 [01:43<07:25, 46.04it/s]

Ep 4500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 18%|█▊        | 4608/25000 [01:46<07:14, 46.90it/s]

Ep 4600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 19%|█▉        | 4708/25000 [01:48<07:06, 47.61it/s]

Ep 4700/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 2


 19%|█▉        | 4805/25000 [01:50<08:39, 38.85it/s]

Ep 4800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 20%|█▉        | 4910/25000 [01:53<07:39, 43.67it/s]

Ep 4900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 20%|██        | 5005/25000 [01:55<06:58, 47.73it/s]

Ep 5000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 20%|██        | 5105/25000 [01:57<06:52, 48.22it/s]

Ep 5100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 21%|██        | 5205/25000 [01:59<06:58, 47.32it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 3


 21%|██        | 5305/25000 [02:01<06:56, 47.26it/s]

Ep 5300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 22%|██▏       | 5408/25000 [02:04<08:26, 38.66it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 22%|██▏       | 5507/25000 [02:07<07:44, 41.98it/s]

Ep 5500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 22%|██▏       | 5607/25000 [02:09<06:50, 47.23it/s]

Ep 5600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 23%|██▎       | 5707/25000 [02:11<06:56, 46.30it/s]

Ep 5700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 23%|██▎       | 5807/25000 [02:13<06:44, 47.51it/s]

Ep 5800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 24%|██▎       | 5907/25000 [02:15<06:53, 46.18it/s]

Ep 5900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 24%|██▍       | 6006/25000 [02:18<08:12, 38.57it/s]

Ep 6000/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.0, First Action 6


 24%|██▍       | 6108/25000 [02:21<07:46, 40.50it/s]

Ep 6100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


 25%|██▍       | 6208/25000 [02:23<06:53, 45.47it/s]

Ep 6200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 25%|██▌       | 6308/25000 [02:25<06:37, 47.08it/s]

Ep 6300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 1


 26%|██▌       | 6408/25000 [02:27<06:27, 48.00it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 2


 26%|██▌       | 6508/25000 [02:29<06:34, 46.87it/s]

Ep 6500/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 17.7, First Action 1


 26%|██▋       | 6605/25000 [02:32<08:26, 36.29it/s]

Ep 6600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 27%|██▋       | 6703/25000 [02:34<09:03, 33.69it/s]

Ep 6700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 27%|██▋       | 6808/25000 [02:37<06:33, 46.24it/s]

Ep 6800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 6


 28%|██▊       | 6908/25000 [02:39<06:37, 45.49it/s]

Ep 6900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 28%|██▊       | 7008/25000 [02:41<06:21, 47.19it/s]

Ep 7000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 28%|██▊       | 7108/25000 [02:43<06:27, 46.23it/s]

Ep 7100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 29%|██▉       | 7207/25000 [02:45<07:58, 37.22it/s]

Ep 7200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 5


 29%|██▉       | 7306/25000 [02:48<07:29, 39.38it/s]

Ep 7300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 30%|██▉       | 7406/25000 [02:50<06:20, 46.29it/s]

Ep 7400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 30%|███       | 7506/25000 [02:53<06:17, 46.32it/s]

Ep 7500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 30%|███       | 7606/25000 [02:55<06:18, 46.00it/s]

Ep 7600/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 6


 31%|███       | 7706/25000 [02:57<06:07, 47.04it/s]

Ep 7700/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 6


 31%|███       | 7804/25000 [02:59<07:46, 36.89it/s]

Ep 7800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 32%|███▏      | 7905/25000 [03:02<07:53, 36.11it/s]

Ep 7900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 32%|███▏      | 8005/25000 [03:04<06:13, 45.48it/s]

Ep 8000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 32%|███▏      | 8105/25000 [03:07<06:06, 46.10it/s]

Ep 8100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 3


 33%|███▎      | 8205/25000 [03:09<05:57, 46.96it/s]

Ep 8200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 33%|███▎      | 8305/25000 [03:11<06:06, 45.51it/s]

Ep 8300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 34%|███▎      | 8406/25000 [03:13<07:52, 35.10it/s]

Ep 8400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 34%|███▍      | 8509/25000 [03:16<06:48, 40.37it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 34%|███▍      | 8609/25000 [03:19<05:57, 45.86it/s]

Ep 8600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 35%|███▍      | 8709/25000 [03:21<05:49, 46.63it/s]

Ep 8700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 35%|███▌      | 8809/25000 [03:23<05:47, 46.53it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 36%|███▌      | 8909/25000 [03:25<05:49, 46.04it/s]

Ep 8900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 36%|███▌      | 9005/25000 [03:27<06:54, 38.56it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 1


 36%|███▋      | 9106/25000 [03:30<06:09, 43.05it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 37%|███▋      | 9206/25000 [03:32<05:45, 45.69it/s]

Ep 9200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 37%|███▋      | 9306/25000 [03:34<05:40, 46.08it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 38%|███▊      | 9406/25000 [03:37<05:36, 46.40it/s]

Ep 9400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 38%|███▊      | 9506/25000 [03:39<05:38, 45.82it/s]

Ep 9500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 38%|███▊      | 9605/25000 [03:41<07:07, 36.05it/s]

Ep 9600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 5


 39%|███▉      | 9709/25000 [03:44<05:56, 42.87it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 5


 39%|███▉      | 9804/25000 [03:46<05:32, 45.76it/s]

Ep 9800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 40%|███▉      | 9905/25000 [03:49<06:44, 37.32it/s]

Ep 9900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 40%|████      | 10005/25000 [03:52<05:36, 44.60it/s]

Ep 10000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 40%|████      | 10105/25000 [03:54<05:18, 46.74it/s]

Ep 10100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 41%|████      | 10206/25000 [03:57<06:57, 35.42it/s]

Ep 10200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 41%|████      | 10306/25000 [03:59<05:11, 47.23it/s]

Ep 10300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 42%|████▏     | 10406/25000 [04:01<05:11, 46.89it/s]

Ep 10400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 42%|████▏     | 10506/25000 [04:03<05:10, 46.66it/s]

Ep 10500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 42%|████▏     | 10606/25000 [04:05<05:10, 46.32it/s]

Ep 10600/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 1


 43%|████▎     | 10706/25000 [04:08<05:06, 46.62it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 43%|████▎     | 10805/25000 [04:10<07:00, 33.74it/s]

Ep 10800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 44%|████▎     | 10906/25000 [04:13<05:00, 46.95it/s]

Ep 10900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 44%|████▍     | 11006/25000 [04:15<04:54, 47.56it/s]

Ep 11000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 44%|████▍     | 11106/25000 [04:17<04:55, 46.95it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 45%|████▍     | 11206/25000 [04:19<04:56, 46.45it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 45%|████▌     | 11306/25000 [04:21<04:53, 46.64it/s]

Ep 11300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 46%|████▌     | 11406/25000 [04:24<06:34, 34.44it/s]

Ep 11400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 46%|████▌     | 11507/25000 [04:27<04:47, 46.91it/s]

Ep 11500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 46%|████▋     | 11607/25000 [04:29<04:53, 45.60it/s]

Ep 11600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 47%|████▋     | 11707/25000 [04:31<04:44, 46.78it/s]

Ep 11700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 47%|████▋     | 11807/25000 [04:33<04:44, 46.35it/s]

Ep 11800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 48%|████▊     | 11907/25000 [04:35<04:47, 45.54it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 48%|████▊     | 12004/25000 [04:38<06:05, 35.53it/s]

Ep 12000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 48%|████▊     | 12109/25000 [04:41<04:35, 46.77it/s]

Ep 12100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 49%|████▉     | 12209/25000 [04:43<04:30, 47.22it/s]

Ep 12200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 49%|████▉     | 12309/25000 [04:45<04:31, 46.81it/s]

Ep 12300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 50%|████▉     | 12409/25000 [04:47<04:25, 47.48it/s]

Ep 12400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 50%|█████     | 12509/25000 [04:49<04:25, 47.04it/s]

Ep 12500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 50%|█████     | 12604/25000 [04:52<05:57, 34.65it/s]

Ep 12600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 51%|█████     | 12708/25000 [04:54<04:24, 46.48it/s]

Ep 12700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 51%|█████     | 12808/25000 [04:57<04:20, 46.88it/s]

Ep 12800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 52%|█████▏    | 12908/25000 [04:59<04:21, 46.19it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 52%|█████▏    | 13008/25000 [05:01<04:13, 47.35it/s]

Ep 13000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 52%|█████▏    | 13108/25000 [05:03<04:16, 46.40it/s]

Ep 13100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 53%|█████▎    | 13203/25000 [05:06<05:37, 35.00it/s]

Ep 13200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 53%|█████▎    | 13307/25000 [05:08<04:22, 44.51it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 54%|█████▎    | 13407/25000 [05:11<04:15, 45.43it/s]

Ep 13400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.2, First Action 1


 54%|█████▍    | 13507/25000 [05:13<04:09, 46.15it/s]

Ep 13500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 54%|█████▍    | 13607/25000 [05:15<04:05, 46.47it/s]

Ep 13600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 55%|█████▍    | 13707/25000 [05:17<04:00, 46.89it/s]

Ep 13700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 5


 55%|█████▌    | 13804/25000 [05:20<05:31, 33.76it/s]

Ep 13800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 56%|█████▌    | 13909/25000 [05:22<04:04, 45.37it/s]

Ep 13900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 56%|█████▌    | 14009/25000 [05:25<03:58, 46.03it/s]

Ep 14000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 56%|█████▋    | 14109/25000 [05:27<03:47, 47.77it/s]

Ep 14100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 57%|█████▋    | 14209/25000 [05:29<03:55, 45.83it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 57%|█████▋    | 14304/25000 [05:31<03:47, 47.00it/s]

Ep 14300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 1


 58%|█████▊    | 14404/25000 [05:34<05:18, 33.28it/s]

Ep 14400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 58%|█████▊    | 14509/25000 [05:36<03:43, 47.04it/s]

Ep 14500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 58%|█████▊    | 14604/25000 [05:38<03:47, 45.76it/s]

Ep 14600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 59%|█████▉    | 14709/25000 [05:41<03:44, 45.77it/s]

Ep 14700/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.2, First Action 5


 59%|█████▉    | 14809/25000 [05:43<03:36, 47.11it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 2


 60%|█████▉    | 14904/25000 [05:45<03:54, 43.14it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 60%|██████    | 15004/25000 [05:48<04:45, 35.05it/s]

Ep 15000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 60%|██████    | 15107/25000 [05:50<03:33, 46.42it/s]

Ep 15100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 5


 61%|██████    | 15207/25000 [05:52<03:32, 46.06it/s]

Ep 15200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 61%|██████    | 15307/25000 [05:55<03:32, 45.62it/s]

Ep 15300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 62%|██████▏   | 15407/25000 [05:57<03:25, 46.68it/s]

Ep 15400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 62%|██████▏   | 15507/25000 [05:59<03:35, 44.08it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 62%|██████▏   | 15606/25000 [06:02<04:41, 33.33it/s]

Ep 15600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 63%|██████▎   | 15706/25000 [06:04<03:18, 46.70it/s]

Ep 15700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 63%|██████▎   | 15806/25000 [06:06<03:16, 46.89it/s]

Ep 15800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 6


 64%|██████▎   | 15906/25000 [06:08<03:15, 46.41it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 64%|██████▍   | 16006/25000 [06:11<03:18, 45.29it/s]

Ep 16000/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 5


 64%|██████▍   | 16106/25000 [06:13<03:17, 45.11it/s]

Ep 16100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 65%|██████▍   | 16206/25000 [06:16<04:15, 34.36it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 65%|██████▌   | 16307/25000 [06:18<03:04, 47.09it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 66%|██████▌   | 16407/25000 [06:20<03:02, 47.19it/s]

Ep 16400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 5


 66%|██████▌   | 16507/25000 [06:22<03:04, 46.04it/s]

Ep 16500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 66%|██████▋   | 16607/25000 [06:24<03:05, 45.21it/s]

Ep 16600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 9


 67%|██████▋   | 16707/25000 [06:27<03:12, 43.03it/s]

Ep 16700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 67%|██████▋   | 16805/25000 [06:29<04:01, 33.98it/s]

Ep 16800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 68%|██████▊   | 16909/25000 [06:32<02:55, 46.11it/s]

Ep 16900/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 0


 68%|██████▊   | 17009/25000 [06:34<02:50, 46.86it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 5


 68%|██████▊   | 17105/25000 [06:36<02:45, 47.84it/s]

Ep 17100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 5


 69%|██████▉   | 17205/25000 [06:38<02:40, 48.51it/s]

Ep 17200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 69%|██████▉   | 17305/25000 [06:40<02:47, 45.97it/s]

Ep 17300/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.6, First Action 0


 70%|██████▉   | 17405/25000 [06:43<03:32, 35.72it/s]

Ep 17400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 70%|███████   | 17507/25000 [06:46<02:44, 45.43it/s]

Ep 17500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 5


 70%|███████   | 17607/25000 [06:48<02:35, 47.66it/s]

Ep 17600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 71%|███████   | 17707/25000 [06:50<02:30, 48.37it/s]

Ep 17700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 71%|███████   | 17807/25000 [06:52<02:34, 46.47it/s]

Ep 17800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 5


 72%|███████▏  | 17907/25000 [06:54<02:31, 46.70it/s]

Ep 17900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 72%|███████▏  | 18005/25000 [06:57<03:05, 37.69it/s]

Ep 18000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 72%|███████▏  | 18106/25000 [07:00<02:34, 44.71it/s]

Ep 18100/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.3, First Action 0


 73%|███████▎  | 18206/25000 [07:02<02:25, 46.54it/s]

Ep 18200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.1, First Action 0


 73%|███████▎  | 18306/25000 [07:04<02:25, 45.93it/s]

Ep 18300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 5


 74%|███████▎  | 18406/25000 [07:06<02:21, 46.49it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.4, First Action 6


 74%|███████▍  | 18506/25000 [07:08<02:19, 46.60it/s]

Ep 18500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 74%|███████▍  | 18604/25000 [07:11<02:49, 37.73it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 75%|███████▍  | 18709/25000 [07:13<02:17, 45.73it/s]

Ep 18700/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.7, First Action 9


 75%|███████▌  | 18809/25000 [07:16<02:14, 45.97it/s]

Ep 18800/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.3, First Action 5


 76%|███████▌  | 18909/25000 [07:18<02:09, 47.01it/s]

Ep 18900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 5


 76%|███████▌  | 19009/25000 [07:20<02:04, 47.97it/s]

Ep 19000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 76%|███████▋  | 19109/25000 [07:22<02:03, 47.64it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 77%|███████▋  | 19204/25000 [07:25<02:47, 34.63it/s]

Ep 19200/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 2


 77%|███████▋  | 19307/25000 [07:27<02:07, 44.61it/s]

Ep 19300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 78%|███████▊  | 19407/25000 [07:30<02:05, 44.51it/s]

Ep 19400/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.7, First Action 0


 78%|███████▊  | 19507/25000 [07:32<01:59, 45.97it/s]

Ep 19500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 78%|███████▊  | 19607/25000 [07:34<01:57, 45.97it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.1, First Action 5


 79%|███████▉  | 19707/25000 [07:36<01:58, 44.68it/s]

Ep 19700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 5


 79%|███████▉  | 19806/25000 [07:39<02:16, 38.14it/s]

Ep 19800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 5


 80%|███████▉  | 19907/25000 [07:41<01:53, 44.79it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 5


 80%|████████  | 20007/25000 [07:44<01:49, 45.45it/s]

Ep 20000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 5


 80%|████████  | 20107/25000 [07:46<01:46, 46.01it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 0


 81%|████████  | 20207/25000 [07:48<01:40, 47.91it/s]

Ep 20200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 5


 81%|████████  | 20307/25000 [07:50<01:41, 46.40it/s]

Ep 20300/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.4, First Action 5


 82%|████████▏ | 20406/25000 [07:53<02:03, 37.06it/s]

Ep 20400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.6, First Action 5


 82%|████████▏ | 20506/25000 [07:55<01:36, 46.52it/s]

Ep 20500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 5


 82%|████████▏ | 20606/25000 [07:57<01:35, 45.91it/s]

Ep 20600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 5


 83%|████████▎ | 20706/25000 [08:00<01:34, 45.61it/s]

Ep 20700/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.5, First Action 5


 83%|████████▎ | 20806/25000 [08:02<01:31, 45.65it/s]

Ep 20800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 6


 84%|████████▎ | 20906/25000 [08:04<01:28, 46.52it/s]

Ep 20900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 84%|████████▍ | 21004/25000 [08:07<01:53, 35.25it/s]

Ep 21000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 5


 84%|████████▍ | 21105/25000 [08:10<01:53, 34.30it/s]

Ep 21100/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.7, First Action 5


 85%|████████▍ | 21208/25000 [08:13<01:32, 41.00it/s]

Ep 21200/25000, Opt. Action: 2, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.7, First Action 5


 85%|████████▌ | 21308/25000 [08:15<01:20, 45.70it/s]

Ep 21300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 5


 86%|████████▌ | 21408/25000 [08:17<01:18, 46.00it/s]

Ep 21400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 86%|████████▌ | 21508/25000 [08:19<01:13, 47.31it/s]

Ep 21500/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.0, First Action 5


 86%|████████▋ | 21607/25000 [08:21<01:29, 38.08it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 87%|████████▋ | 21703/25000 [08:24<01:38, 33.52it/s]

Ep 21700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 87%|████████▋ | 21808/25000 [08:27<01:08, 46.28it/s]

Ep 21800/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.5, First Action 5


 88%|████████▊ | 21908/25000 [08:29<01:05, 47.08it/s]

Ep 21900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 6


 88%|████████▊ | 22008/25000 [08:31<01:05, 45.71it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 88%|████████▊ | 22108/25000 [08:33<01:03, 45.81it/s]

Ep 22100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 5


 89%|████████▉ | 22206/25000 [08:35<01:14, 37.50it/s]

Ep 22200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 5


 89%|████████▉ | 22305/25000 [08:38<01:18, 34.36it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 90%|████████▉ | 22405/25000 [08:41<00:56, 45.82it/s]

Ep 22400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 90%|█████████ | 22506/25000 [08:43<00:53, 46.74it/s]

Ep 22500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 2


 90%|█████████ | 22606/25000 [08:45<00:51, 46.48it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 5


 91%|█████████ | 22706/25000 [08:47<00:48, 47.10it/s]

Ep 22700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 91%|█████████ | 22805/25000 [08:49<00:56, 38.73it/s]

Ep 22800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 92%|█████████▏| 22905/25000 [08:52<01:02, 33.61it/s]

Ep 22900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 5


 92%|█████████▏| 23005/25000 [08:54<00:42, 46.63it/s]

Ep 23000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 92%|█████████▏| 23105/25000 [08:57<00:41, 46.15it/s]

Ep 23100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 93%|█████████▎| 23205/25000 [08:59<00:38, 46.82it/s]

Ep 23200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 5


 93%|█████████▎| 23305/25000 [09:01<00:36, 45.82it/s]

Ep 23300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 94%|█████████▎| 23404/25000 [09:03<00:41, 38.66it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 94%|█████████▍| 23505/25000 [09:06<00:45, 32.93it/s]

Ep 23500/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.7, First Action 5


 94%|█████████▍| 23606/25000 [09:08<00:30, 45.60it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 5


 95%|█████████▍| 23706/25000 [09:11<00:27, 46.54it/s]

Ep 23700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 9


 95%|█████████▌| 23806/25000 [09:13<00:25, 46.62it/s]

Ep 23800/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 5


 96%|█████████▌| 23906/25000 [09:15<00:23, 46.12it/s]

Ep 23900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 96%|█████████▌| 24004/25000 [09:17<00:26, 36.95it/s]

Ep 24000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 9


 96%|█████████▋| 24104/25000 [09:20<00:26, 34.00it/s]

Ep 24100/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.0, First Action 5


 97%|█████████▋| 24209/25000 [09:22<00:17, 45.65it/s]

Ep 24200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 5


 97%|█████████▋| 24309/25000 [09:25<00:14, 46.65it/s]

Ep 24300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 5


 98%|█████████▊| 24409/25000 [09:27<00:12, 46.53it/s]

Ep 24400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 98%|█████████▊| 24509/25000 [09:29<00:10, 46.88it/s]

Ep 24500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 5


 98%|█████████▊| 24603/25000 [09:31<00:10, 36.28it/s]

Ep 24600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 99%|█████████▉| 24706/25000 [09:34<00:08, 34.48it/s]

Ep 24700/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 5


 99%|█████████▉| 24809/25000 [09:36<00:04, 45.38it/s]

Ep 24800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 5


100%|█████████▉| 24904/25000 [09:38<00:02, 43.57it/s]

Ep 24900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 5


100%|██████████| 25000/25000 [09:40<00:00, 43.03it/s]


Ep 25000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.5, First Action 5

TEST:


 42%|████▏     | 126/300 [00:00<00:01, 150.99it/s]

Ep 100/300, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.9, First Action 4


 73%|███████▎  | 219/300 [00:01<00:00, 147.73it/s]

Ep 200/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 5


100%|██████████| 300/300 [00:02<00:00, 145.69it/s]


Ep 300/300, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 5

GAMMA 0.8 - LR 0.001 - Entropy Decay True


  0%|          | 103/25000 [00:02<12:01, 34.52it/s]

Ep 100/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 17.7, First Action 10


  1%|          | 207/25000 [00:05<08:59, 45.98it/s]

Ep 200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


  1%|          | 307/25000 [00:07<08:49, 46.61it/s]

Ep 300/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 4


  2%|▏         | 407/25000 [00:09<08:58, 45.64it/s]

Ep 400/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


  2%|▏         | 507/25000 [00:11<08:47, 46.40it/s]

Ep 500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  2%|▏         | 605/25000 [00:14<11:14, 36.15it/s]

Ep 600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


  3%|▎         | 705/25000 [00:17<11:37, 34.81it/s]

Ep 700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


  3%|▎         | 808/25000 [00:19<08:52, 45.41it/s]

Ep 800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 2


  4%|▎         | 908/25000 [00:21<08:38, 46.42it/s]

Ep 900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  4%|▍         | 1008/25000 [00:23<08:50, 45.22it/s]

Ep 1000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  4%|▍         | 1108/25000 [00:25<08:29, 46.90it/s]

Ep 1100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


  5%|▍         | 1207/25000 [00:28<10:26, 37.95it/s]

Ep 1200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  5%|▌         | 1307/25000 [00:30<11:09, 35.37it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


  6%|▌         | 1408/25000 [00:33<08:30, 46.23it/s]

Ep 1400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


  6%|▌         | 1508/25000 [00:35<08:22, 46.77it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 3


  6%|▋         | 1608/25000 [00:37<08:48, 44.30it/s]

Ep 1600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  7%|▋         | 1708/25000 [00:39<08:28, 45.78it/s]

Ep 1700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


  7%|▋         | 1806/25000 [00:42<10:17, 37.55it/s]

Ep 1800/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.7, First Action 8


  8%|▊         | 1905/25000 [00:44<11:03, 34.79it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


  8%|▊         | 2009/25000 [00:47<08:29, 45.08it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  8%|▊         | 2109/25000 [00:49<08:18, 45.89it/s]

Ep 2100/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 17.2, First Action 8


  9%|▉         | 2209/25000 [00:51<08:12, 46.25it/s]

Ep 2200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


  9%|▉         | 2309/25000 [00:53<07:55, 47.73it/s]

Ep 2300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8


 10%|▉         | 2404/25000 [00:55<09:18, 40.47it/s]

Ep 2400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 10%|█         | 2506/25000 [00:58<11:00, 34.06it/s]

Ep 2500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 3


 10%|█         | 2606/25000 [01:01<08:04, 46.24it/s]

Ep 2600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 5


 11%|█         | 2706/25000 [01:03<08:08, 45.67it/s]

Ep 2700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 11%|█         | 2806/25000 [01:05<07:55, 46.63it/s]

Ep 2800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.4, First Action 3


 12%|█▏        | 2906/25000 [01:07<07:58, 46.14it/s]

Ep 2900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 3


 12%|█▏        | 3006/25000 [01:09<09:52, 37.13it/s]

Ep 3000/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.0, First Action 3


 12%|█▏        | 3104/25000 [01:12<10:46, 33.86it/s]

Ep 3100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 13%|█▎        | 3208/25000 [01:15<08:08, 44.63it/s]

Ep 3200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 3


 13%|█▎        | 3308/25000 [01:17<07:58, 45.37it/s]

Ep 3300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 3


 14%|█▎        | 3408/25000 [01:19<07:57, 45.19it/s]

Ep 3400/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.3, First Action 3


 14%|█▍        | 3508/25000 [01:21<07:32, 47.54it/s]

Ep 3500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 14%|█▍        | 3606/25000 [01:23<09:35, 37.15it/s]

Ep 3600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 3


 15%|█▍        | 3704/25000 [01:26<10:32, 33.68it/s]

Ep 3700/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 3


 15%|█▌        | 3808/25000 [01:28<07:30, 47.05it/s]

Ep 3800/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.1, First Action 3


 16%|█▌        | 3908/25000 [01:31<07:36, 46.25it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 3


 16%|█▌        | 4008/25000 [01:33<07:34, 46.17it/s]

Ep 4000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 3


 16%|█▋        | 4108/25000 [01:35<07:25, 46.94it/s]

Ep 4100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 3


 17%|█▋        | 4207/25000 [01:37<09:08, 37.94it/s]

Ep 4200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 3


 17%|█▋        | 4304/25000 [01:40<10:24, 33.15it/s]

Ep 4300/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.0, First Action 3


 18%|█▊        | 4408/25000 [01:42<07:19, 46.83it/s]

Ep 4400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 18%|█▊        | 4508/25000 [01:45<07:37, 44.78it/s]

Ep 4500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 3


 18%|█▊        | 4608/25000 [01:47<07:33, 45.00it/s]

Ep 4600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 3


 19%|█▉        | 4708/25000 [01:49<07:16, 46.47it/s]

Ep 4700/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.6, First Action 3


 19%|█▉        | 4806/25000 [01:51<09:06, 36.95it/s]

Ep 4800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 3


 20%|█▉        | 4903/25000 [01:54<09:49, 34.12it/s]

Ep 4900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 3


 20%|██        | 5008/25000 [01:56<07:01, 47.42it/s]

Ep 5000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 3


 20%|██        | 5108/25000 [01:59<07:08, 46.40it/s]

Ep 5100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 21%|██        | 5208/25000 [02:01<07:05, 46.53it/s]

Ep 5200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 3


 21%|██        | 5308/25000 [02:03<07:04, 46.35it/s]

Ep 5300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 3


 22%|██▏       | 5406/25000 [02:05<08:34, 38.12it/s]

Ep 5400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 3


 22%|██▏       | 5504/25000 [02:08<09:50, 33.00it/s]

Ep 5500/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.0, First Action 3


 22%|██▏       | 5609/25000 [02:10<06:53, 46.86it/s]

Ep 5600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.3, First Action 3


 23%|██▎       | 5709/25000 [02:13<07:01, 45.80it/s]

Ep 5700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 3


 23%|██▎       | 5809/25000 [02:15<07:05, 45.12it/s]

Ep 5800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 3


 24%|██▎       | 5909/25000 [02:17<07:09, 44.43it/s]

Ep 5900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.3, First Action 3


 24%|██▍       | 6007/25000 [02:19<08:35, 36.81it/s]

Ep 6000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 3


 24%|██▍       | 6104/25000 [02:22<08:56, 35.23it/s]

Ep 6100/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 3


 25%|██▍       | 6207/25000 [02:24<06:54, 45.32it/s]

Ep 6200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 25%|██▌       | 6307/25000 [02:27<06:45, 46.11it/s]

Ep 6300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 26%|██▌       | 6407/25000 [02:29<06:45, 45.82it/s]

Ep 6400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 26%|██▌       | 6507/25000 [02:31<06:34, 46.91it/s]

Ep 6500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 26%|██▋       | 6606/25000 [02:33<07:51, 38.98it/s]

Ep 6600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.0, First Action 3


 27%|██▋       | 6705/25000 [02:36<09:03, 33.65it/s]

Ep 6700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 3


 27%|██▋       | 6807/25000 [02:38<06:28, 46.84it/s]

Ep 6800/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 3


 28%|██▊       | 6906/25000 [02:41<08:29, 35.50it/s]

Ep 6900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 28%|██▊       | 7005/25000 [02:44<06:38, 45.16it/s]

Ep 7000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 3


 28%|██▊       | 7105/25000 [02:46<06:22, 46.79it/s]

Ep 7100/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 3


 29%|██▉       | 7205/25000 [02:48<08:47, 33.72it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 3


 29%|██▉       | 7306/25000 [02:51<06:59, 42.22it/s]

Ep 7300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.6, First Action 3


 30%|██▉       | 7406/25000 [02:53<06:15, 46.90it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 3


 30%|███       | 7506/25000 [02:55<06:17, 46.36it/s]

Ep 7500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 3


 30%|███       | 7608/25000 [02:58<06:23, 45.39it/s]

Ep 7600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 3


 31%|███       | 7708/25000 [03:00<06:14, 46.13it/s]

Ep 7700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 31%|███       | 7807/25000 [03:02<07:40, 37.34it/s]

Ep 7800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 3


 32%|███▏      | 7906/25000 [03:05<06:47, 41.92it/s]

Ep 7900/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.9, First Action 3


 32%|███▏      | 8006/25000 [03:07<06:08, 46.09it/s]

Ep 8000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 3


 32%|███▏      | 8106/25000 [03:09<06:04, 46.34it/s]

Ep 8100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.1, First Action 3


 33%|███▎      | 8206/25000 [03:12<06:08, 45.60it/s]

Ep 8200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 33%|███▎      | 8306/25000 [03:14<05:59, 46.47it/s]

Ep 8300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 3


 34%|███▎      | 8406/25000 [03:16<07:48, 35.40it/s]

Ep 8400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 3


 34%|███▍      | 8505/25000 [03:19<06:52, 39.96it/s]

Ep 8500/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 3


 34%|███▍      | 8605/25000 [03:21<05:48, 47.00it/s]

Ep 8600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.7, First Action 3


 35%|███▍      | 8705/25000 [03:23<05:52, 46.19it/s]

Ep 8700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 35%|███▌      | 8805/25000 [03:26<05:52, 45.95it/s]

Ep 8800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.3, First Action 3


 36%|███▌      | 8905/25000 [03:28<05:54, 45.40it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 36%|███▌      | 9003/25000 [03:30<07:15, 36.72it/s]

Ep 9000/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.7, First Action 3


 36%|███▋      | 9107/25000 [03:33<06:09, 43.01it/s]

Ep 9100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 37%|███▋      | 9207/25000 [03:35<05:35, 47.01it/s]

Ep 9200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 3


 37%|███▋      | 9307/25000 [03:38<05:37, 46.49it/s]

Ep 9300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 3


 38%|███▊      | 9407/25000 [03:40<05:40, 45.79it/s]

Ep 9400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 3


 38%|███▊      | 9507/25000 [03:42<05:32, 46.59it/s]

Ep 9500/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.4, First Action 3


 38%|███▊      | 9605/25000 [03:44<07:04, 36.30it/s]

Ep 9600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 39%|███▉      | 9709/25000 [03:47<05:57, 42.83it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 3


 39%|███▉      | 9809/25000 [03:49<05:41, 44.53it/s]

Ep 9800/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.7, First Action 8


 40%|███▉      | 9904/25000 [03:52<05:41, 44.18it/s]

Ep 9900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 3


 40%|████      | 10009/25000 [03:54<05:20, 46.70it/s]

Ep 10000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 40%|████      | 10109/25000 [03:56<05:15, 47.19it/s]

Ep 10100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 3


 41%|████      | 10205/25000 [03:58<06:23, 38.63it/s]

Ep 10200/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 3


 41%|████      | 10305/25000 [04:01<05:38, 43.41it/s]

Ep 10300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 3


 42%|████▏     | 10405/25000 [04:03<05:12, 46.65it/s]

Ep 10400/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 3


 42%|████▏     | 10505/25000 [04:05<05:08, 47.00it/s]

Ep 10500/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 3


 42%|████▏     | 10605/25000 [04:08<05:14, 45.79it/s]

Ep 10600/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 3


 43%|████▎     | 10705/25000 [04:10<05:07, 46.46it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 43%|████▎     | 10807/25000 [04:12<06:33, 36.05it/s]

Ep 10800/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 3


 44%|████▎     | 10907/25000 [04:15<05:14, 44.82it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 3


 44%|████▍     | 11007/25000 [04:17<05:03, 46.04it/s]

Ep 11000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 3


 44%|████▍     | 11107/25000 [04:20<05:05, 45.52it/s]

Ep 11100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 45%|████▍     | 11207/25000 [04:22<05:00, 45.85it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 45%|████▌     | 11307/25000 [04:24<04:55, 46.41it/s]

Ep 11300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 46%|████▌     | 11404/25000 [04:26<06:29, 34.91it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 46%|████▌     | 11507/25000 [04:29<04:59, 45.09it/s]

Ep 11500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 3


 46%|████▋     | 11607/25000 [04:31<04:46, 46.78it/s]

Ep 11600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.2, First Action 3


 47%|████▋     | 11707/25000 [04:34<04:47, 46.26it/s]

Ep 11700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 3


 47%|████▋     | 11807/25000 [04:36<04:44, 46.32it/s]

Ep 11800/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.5, First Action 3


 48%|████▊     | 11907/25000 [04:38<04:39, 46.86it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 11.5, First Action 3


 48%|████▊     | 12007/25000 [04:41<06:04, 35.64it/s]

Ep 12000/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.3, First Action 3


 48%|████▊     | 12108/25000 [04:43<04:41, 45.83it/s]

Ep 12100/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 3


 49%|████▉     | 12208/25000 [04:45<04:34, 46.67it/s]

Ep 12200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.6, First Action 3


 49%|████▉     | 12308/25000 [04:48<04:43, 44.84it/s]

Ep 12300/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.2, First Action 3


 50%|████▉     | 12408/25000 [04:50<04:35, 45.77it/s]

Ep 12400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.8, First Action 3


 50%|█████     | 12508/25000 [04:52<04:25, 47.03it/s]

Ep 12500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 50%|█████     | 12607/25000 [04:55<05:30, 37.52it/s]

Ep 12600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.9, First Action 3


 51%|█████     | 12709/25000 [04:57<04:31, 45.20it/s]

Ep 12700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 3


 51%|█████     | 12809/25000 [04:59<04:23, 46.27it/s]

Ep 12800/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 3


 52%|█████▏    | 12909/25000 [05:02<04:26, 45.33it/s]

Ep 12900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 3


 52%|█████▏    | 13009/25000 [05:04<04:18, 46.45it/s]

Ep 13000/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.2, First Action 3


 52%|█████▏    | 13109/25000 [05:06<04:12, 47.02it/s]

Ep 13100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 3


 53%|█████▎    | 13207/25000 [05:08<05:26, 36.08it/s]

Ep 13200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 3


 53%|█████▎    | 13305/25000 [05:11<04:22, 44.47it/s]

Ep 13300/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.1, First Action 3


 54%|█████▎    | 13405/25000 [05:13<04:15, 45.46it/s]

Ep 13400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 3


 54%|█████▍    | 13505/25000 [05:16<04:17, 44.58it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 54%|█████▍    | 13605/25000 [05:18<04:09, 45.59it/s]

Ep 13600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 3


 55%|█████▍    | 13705/25000 [05:20<04:05, 45.92it/s]

Ep 13700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 3


 55%|█████▌    | 13806/25000 [05:23<05:23, 34.61it/s]

Ep 13800/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 3


 56%|█████▌    | 13909/25000 [05:25<04:03, 45.60it/s]

Ep 13900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 56%|█████▌    | 14004/25000 [05:27<04:04, 44.91it/s]

Ep 14000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.9, First Action 3


 56%|█████▋    | 14109/25000 [05:30<03:56, 46.06it/s]

Ep 14100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 3


 57%|█████▋    | 14209/25000 [05:32<03:55, 45.88it/s]

Ep 14200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 3


 57%|█████▋    | 14309/25000 [05:34<03:53, 45.85it/s]

Ep 14300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 3


 58%|█████▊    | 14406/25000 [05:37<05:14, 33.68it/s]

Ep 14400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.9, First Action 3


 58%|█████▊    | 14508/25000 [05:39<04:01, 43.44it/s]

Ep 14500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 3


 58%|█████▊    | 14608/25000 [05:42<03:50, 45.16it/s]

Ep 14600/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 3


 59%|█████▉    | 14708/25000 [05:44<03:44, 45.89it/s]

Ep 14700/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 3


 59%|█████▉    | 14808/25000 [05:46<03:35, 47.30it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 3


 60%|█████▉    | 14908/25000 [05:48<03:35, 46.89it/s]

Ep 14900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 3


 60%|██████    | 15005/25000 [05:51<04:26, 37.44it/s]

Ep 15000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.1, First Action 3


 60%|██████    | 15106/25000 [05:53<03:43, 44.24it/s]

Ep 15100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 3


 61%|██████    | 15206/25000 [05:56<03:34, 45.71it/s]

Ep 15200/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.2, First Action 3


 61%|██████    | 15306/25000 [05:58<03:34, 45.29it/s]

Ep 15300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 3


 62%|██████▏   | 15406/25000 [06:00<03:27, 46.16it/s]

Ep 15400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 3


 62%|██████▏   | 15506/25000 [06:02<03:20, 47.38it/s]

Ep 15500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 3


 62%|██████▏   | 15604/25000 [06:05<04:16, 36.63it/s]

Ep 15600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 3


 63%|██████▎   | 15707/25000 [06:07<03:37, 42.80it/s]

Ep 15700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 3


 63%|██████▎   | 15807/25000 [06:10<03:24, 45.05it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 64%|██████▎   | 15907/25000 [06:12<03:19, 45.54it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.2, First Action 3


 64%|██████▍   | 16007/25000 [06:14<03:17, 45.59it/s]

Ep 16000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 3


 64%|██████▍   | 16107/25000 [06:16<03:11, 46.39it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 65%|██████▍   | 16205/25000 [06:18<03:48, 38.52it/s]

Ep 16200/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.0, First Action 3


 65%|██████▌   | 16305/25000 [06:21<04:05, 35.44it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 3


 66%|██████▌   | 16405/25000 [06:24<03:03, 46.87it/s]

Ep 16400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 3


 66%|██████▌   | 16505/25000 [06:26<03:02, 46.52it/s]

Ep 16500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 3


 66%|██████▋   | 16605/25000 [06:28<03:01, 46.35it/s]

Ep 16600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 3


 67%|██████▋   | 16705/25000 [06:30<02:57, 46.75it/s]

Ep 16700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 67%|██████▋   | 16807/25000 [06:32<03:42, 36.86it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.2, First Action 3


 68%|██████▊   | 16903/25000 [06:35<03:59, 33.86it/s]

Ep 16900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 68%|██████▊   | 17006/25000 [06:37<02:57, 44.99it/s]

Ep 17000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 68%|██████▊   | 17106/25000 [06:40<02:54, 45.36it/s]

Ep 17100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 3


 69%|██████▉   | 17206/25000 [06:42<02:51, 45.43it/s]

Ep 17200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 69%|██████▉   | 17306/25000 [06:44<02:46, 46.27it/s]

Ep 17300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 3


 70%|██████▉   | 17405/25000 [06:46<03:23, 37.31it/s]

Ep 17400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 3


 70%|███████   | 17505/25000 [06:49<03:58, 31.37it/s]

Ep 17500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 70%|███████   | 17609/25000 [06:52<02:44, 45.04it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 3


 71%|███████   | 17709/25000 [06:54<02:38, 45.98it/s]

Ep 17700/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 3


 71%|███████   | 17809/25000 [06:56<02:35, 46.31it/s]

Ep 17800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 3


 72%|███████▏  | 17907/25000 [06:58<03:17, 35.99it/s]

Ep 17900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 3


 72%|███████▏  | 18003/25000 [07:01<03:30, 33.19it/s]

Ep 18000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 3


 72%|███████▏  | 18103/25000 [07:04<03:26, 33.38it/s]

Ep 18100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 73%|███████▎  | 18207/25000 [07:07<02:29, 45.53it/s]

Ep 18200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 3


 73%|███████▎  | 18307/25000 [07:09<02:30, 44.61it/s]

Ep 18300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 3


 74%|███████▎  | 18407/25000 [07:11<02:24, 45.75it/s]

Ep 18400/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 3


 74%|███████▍  | 18507/25000 [07:13<02:19, 46.47it/s]

Ep 18500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.6, First Action 3


 74%|███████▍  | 18606/25000 [07:15<02:46, 38.40it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 75%|███████▍  | 18704/25000 [07:18<03:02, 34.49it/s]

Ep 18700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 75%|███████▌  | 18806/25000 [07:21<02:16, 45.46it/s]

Ep 18800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 3


 76%|███████▌  | 18906/25000 [07:23<02:12, 46.13it/s]

Ep 18900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 76%|███████▌  | 19006/25000 [07:25<02:08, 46.50it/s]

Ep 19000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 3


 76%|███████▋  | 19106/25000 [07:27<02:03, 47.86it/s]

Ep 19100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 3


 77%|███████▋  | 19207/25000 [07:29<02:33, 37.71it/s]

Ep 19200/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.3, First Action 3


 77%|███████▋  | 19306/25000 [07:32<02:43, 34.88it/s]

Ep 19300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 78%|███████▊  | 19407/25000 [07:34<02:01, 46.21it/s]

Ep 19400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 78%|███████▊  | 19507/25000 [07:37<02:00, 45.66it/s]

Ep 19500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 3


 78%|███████▊  | 19607/25000 [07:39<02:03, 43.74it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 79%|███████▉  | 19707/25000 [07:41<01:56, 45.39it/s]

Ep 19700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 79%|███████▉  | 19804/25000 [07:43<02:24, 35.88it/s]

Ep 19800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 3


 80%|███████▉  | 19904/25000 [07:46<02:31, 33.73it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 3


 80%|████████  | 20009/25000 [07:49<01:47, 46.25it/s]

Ep 20000/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.8, First Action 3


 80%|████████  | 20109/25000 [07:51<01:47, 45.57it/s]

Ep 20100/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.7, First Action 3


 81%|████████  | 20209/25000 [07:53<01:43, 46.25it/s]

Ep 20200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 81%|████████  | 20309/25000 [07:55<01:40, 46.64it/s]

Ep 20300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 3


 82%|████████▏ | 20406/25000 [07:57<01:58, 38.80it/s]

Ep 20400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 82%|████████▏ | 20506/25000 [08:00<02:14, 33.49it/s]

Ep 20500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 3


 82%|████████▏ | 20607/25000 [08:03<01:35, 45.84it/s]

Ep 20600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 3


 83%|████████▎ | 20707/25000 [08:05<01:35, 44.92it/s]

Ep 20700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 3


 83%|████████▎ | 20807/25000 [08:07<01:35, 44.00it/s]

Ep 20800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 3


 84%|████████▎ | 20907/25000 [08:09<01:29, 45.74it/s]

Ep 20900/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 3


 84%|████████▍ | 21007/25000 [08:12<01:47, 37.00it/s]

Ep 21000/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.1, First Action 3


 84%|████████▍ | 21104/25000 [08:14<01:57, 33.30it/s]

Ep 21100/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.8, First Action 3


 85%|████████▍ | 21208/25000 [08:17<01:23, 45.29it/s]

Ep 21200/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.0, First Action 3


 85%|████████▌ | 21308/25000 [08:19<01:19, 46.30it/s]

Ep 21300/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 3


 86%|████████▌ | 21408/25000 [08:21<01:15, 47.50it/s]

Ep 21400/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.7, First Action 3


 86%|████████▌ | 21508/25000 [08:23<01:14, 47.03it/s]

Ep 21500/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.6, First Action 3


 86%|████████▋ | 21607/25000 [08:26<01:34, 35.87it/s]

Ep 21600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 3


 87%|████████▋ | 21705/25000 [08:28<01:31, 36.18it/s]

Ep 21700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 3


 87%|████████▋ | 21807/25000 [08:31<01:10, 45.23it/s]

Ep 21800/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 3


 88%|████████▊ | 21907/25000 [08:33<01:06, 46.58it/s]

Ep 21900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 3


 88%|████████▊ | 22007/25000 [08:35<01:05, 45.67it/s]

Ep 22000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 3


 88%|████████▊ | 22107/25000 [08:37<01:03, 45.60it/s]

Ep 22100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 3


 89%|████████▉ | 22207/25000 [08:40<01:17, 36.07it/s]

Ep 22200/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.4, First Action 3


 89%|████████▉ | 22304/25000 [08:42<01:20, 33.53it/s]

Ep 22300/25000, Opt. Action: 3, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 3


 90%|████████▉ | 22406/25000 [08:45<00:56, 45.68it/s]

Ep 22400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.0, First Action 3


 90%|█████████ | 22506/25000 [08:47<00:52, 47.14it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 3


 90%|█████████ | 22606/25000 [08:49<00:51, 46.69it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 91%|█████████ | 22706/25000 [08:51<00:49, 46.26it/s]

Ep 22700/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 3


 91%|█████████ | 22805/25000 [08:54<01:01, 35.57it/s]

Ep 22800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 92%|█████████▏| 22905/25000 [08:57<01:02, 33.26it/s]

Ep 22900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 3


 92%|█████████▏| 23009/25000 [08:59<00:44, 45.13it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.8, First Action 3


 92%|█████████▏| 23109/25000 [09:01<00:40, 47.19it/s]

Ep 23100/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.4, First Action 3


 93%|█████████▎| 23209/25000 [09:03<00:38, 46.91it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 3


 93%|█████████▎| 23309/25000 [09:05<00:36, 46.90it/s]

Ep 23300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 94%|█████████▎| 23407/25000 [09:08<00:44, 35.87it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.5, First Action 3


 94%|█████████▍| 23504/25000 [09:11<00:45, 32.88it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 3


 94%|█████████▍| 23607/25000 [09:13<00:30, 45.88it/s]

Ep 23600/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 3


 95%|█████████▍| 23707/25000 [09:15<00:27, 46.94it/s]

Ep 23700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 95%|█████████▌| 23807/25000 [09:17<00:25, 46.39it/s]

Ep 23800/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.1, First Action 3


 96%|█████████▌| 23907/25000 [09:19<00:23, 46.76it/s]

Ep 23900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 3


 96%|█████████▌| 24005/25000 [09:22<00:27, 36.03it/s]

Ep 24000/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 3


 96%|█████████▋| 24106/25000 [09:25<00:28, 31.58it/s]

Ep 24100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 97%|█████████▋| 24205/25000 [09:27<00:17, 46.31it/s]

Ep 24200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 3


 97%|█████████▋| 24305/25000 [09:29<00:14, 46.94it/s]

Ep 24300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 3


 98%|█████████▊| 24405/25000 [09:31<00:12, 47.45it/s]

Ep 24400/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 3


 98%|█████████▊| 24505/25000 [09:33<00:10, 47.51it/s]

Ep 24500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


 98%|█████████▊| 24605/25000 [09:36<00:10, 36.51it/s]

Ep 24600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 3


 99%|█████████▉| 24706/25000 [09:39<00:09, 32.60it/s]

Ep 24700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 3


 99%|█████████▉| 24805/25000 [09:41<00:04, 46.35it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.1, First Action 3


100%|█████████▉| 24905/25000 [09:43<00:02, 47.33it/s]

Ep 24900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 3


100%|██████████| 25000/25000 [09:45<00:00, 42.69it/s]


Ep 25000/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 3

TEST:


 41%|████      | 123/300 [00:00<00:01, 149.56it/s]

Ep 100/300, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 3


 72%|███████▏  | 217/300 [00:01<00:00, 148.59it/s]

Ep 200/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 3


100%|██████████| 300/300 [00:02<00:00, 144.30it/s]


Ep 300/300, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 3

GAMMA 0.9 - LR 0.001 - Entropy Decay True


  0%|          | 107/25000 [00:02<10:57, 37.89it/s]

Ep 100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


  1%|          | 204/25000 [00:05<12:25, 33.24it/s]

Ep 200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


  1%|          | 309/25000 [00:07<08:55, 46.08it/s]

Ep 300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


  2%|▏         | 409/25000 [00:09<09:00, 45.50it/s]

Ep 400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  2%|▏         | 509/25000 [00:12<08:46, 46.54it/s]

Ep 500/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.7, First Action 1


  2%|▏         | 609/25000 [00:14<08:43, 46.57it/s]

Ep 600/25000, Opt. Action: 2, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 2


  3%|▎         | 708/25000 [00:16<10:21, 39.07it/s]

Ep 700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


  3%|▎         | 804/25000 [00:19<12:07, 33.26it/s]

Ep 800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  4%|▎         | 908/25000 [00:21<08:44, 45.90it/s]

Ep 900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


  4%|▍         | 1008/25000 [00:24<08:55, 44.77it/s]

Ep 1000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  4%|▍         | 1108/25000 [00:26<08:31, 46.67it/s]

Ep 1100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


  5%|▍         | 1208/25000 [00:28<08:30, 46.59it/s]

Ep 1200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  5%|▌         | 1306/25000 [00:31<11:22, 34.69it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 1


  6%|▌         | 1403/25000 [00:34<13:04, 30.07it/s]

Ep 1400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


  6%|▌         | 1505/25000 [00:36<09:27, 41.37it/s]

Ep 1500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


  6%|▋         | 1605/25000 [00:39<09:25, 41.38it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  7%|▋         | 1705/25000 [00:41<09:22, 41.43it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 1


  7%|▋         | 1805/25000 [00:43<09:16, 41.68it/s]

Ep 1800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


  8%|▊         | 1905/25000 [00:46<10:40, 36.06it/s]

Ep 1900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


  8%|▊         | 2004/25000 [00:49<09:01, 42.49it/s]

Ep 2000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


  8%|▊         | 2104/25000 [00:51<09:14, 41.27it/s]

Ep 2100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  9%|▉         | 2204/25000 [00:54<09:23, 40.42it/s]

Ep 2200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


  9%|▉         | 2304/25000 [00:56<08:38, 43.74it/s]

Ep 2300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 7


 10%|▉         | 2404/25000 [00:58<09:10, 41.02it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 10%|█         | 2506/25000 [01:01<11:31, 32.52it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 2


 10%|█         | 2608/25000 [01:04<09:00, 41.41it/s]

Ep 2600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 7


 11%|█         | 2708/25000 [01:06<08:14, 45.12it/s]

Ep 2700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 11%|█         | 2808/25000 [01:08<07:56, 46.58it/s]

Ep 2800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 12%|█▏        | 2908/25000 [01:11<08:09, 45.18it/s]

Ep 2900/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 17.6, First Action 1


 12%|█▏        | 3007/25000 [01:13<09:31, 38.46it/s]

Ep 3000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 2


 12%|█▏        | 3105/25000 [01:16<10:49, 33.73it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 7


 13%|█▎        | 3207/25000 [01:18<07:38, 47.49it/s]

Ep 3200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 7


 13%|█▎        | 3307/25000 [01:20<07:40, 47.06it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 7


 14%|█▎        | 3407/25000 [01:22<07:48, 46.13it/s]

Ep 3400/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 16.2, First Action 10


 14%|█▍        | 3507/25000 [01:25<08:03, 44.45it/s]

Ep 3500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 14%|█▍        | 3604/25000 [01:27<10:25, 34.20it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


 15%|█▍        | 3705/25000 [01:30<10:06, 35.13it/s]

Ep 3700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.4, First Action 3


 15%|█▌        | 3804/25000 [01:33<10:06, 34.95it/s]

Ep 3800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 7


 16%|█▌        | 3905/25000 [01:35<07:37, 46.08it/s]

Ep 3900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 7


 16%|█▌        | 4005/25000 [01:37<07:42, 45.38it/s]

Ep 4000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 7


 16%|█▋        | 4105/25000 [01:40<07:38, 45.54it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 7


 17%|█▋        | 4205/25000 [01:42<07:40, 45.19it/s]

Ep 4200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 6


 17%|█▋        | 4305/25000 [01:44<07:14, 47.65it/s]

Ep 4300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 18%|█▊        | 4408/25000 [01:47<08:54, 38.51it/s]

Ep 4400/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.3, First Action 1


 18%|█▊        | 4509/25000 [01:49<07:42, 44.33it/s]

Ep 4500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.3, First Action 7


 18%|█▊        | 4609/25000 [01:52<07:27, 45.60it/s]

Ep 4600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 7


 19%|█▉        | 4709/25000 [01:54<07:20, 46.06it/s]

Ep 4700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 7


 19%|█▉        | 4809/25000 [01:56<07:17, 46.10it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 7


 20%|█▉        | 4909/25000 [01:58<07:11, 46.60it/s]

Ep 4900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.3, First Action 7


 20%|██        | 5007/25000 [02:01<08:58, 37.13it/s]

Ep 5000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 7


 20%|██        | 5107/25000 [02:03<07:11, 46.15it/s]

Ep 5100/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 16.0, First Action 8


 21%|██        | 5207/25000 [02:06<07:16, 45.30it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.3, First Action 7


 21%|██        | 5307/25000 [02:08<07:03, 46.54it/s]

Ep 5300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 7


 22%|██▏       | 5407/25000 [02:10<07:00, 46.64it/s]

Ep 5400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.1, First Action 7


 22%|██▏       | 5507/25000 [02:12<06:53, 47.13it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 7


 22%|██▏       | 5605/25000 [02:15<08:30, 38.02it/s]

Ep 5600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.2, First Action 7


 23%|██▎       | 5708/25000 [02:17<07:29, 42.90it/s]

Ep 5700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 7


 23%|██▎       | 5808/25000 [02:20<07:00, 45.68it/s]

Ep 5800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 7


 24%|██▎       | 5908/25000 [02:22<06:53, 46.19it/s]

Ep 5900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 7


 24%|██▍       | 6008/25000 [02:24<06:43, 47.08it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.6, First Action 7


 24%|██▍       | 6108/25000 [02:26<06:50, 46.04it/s]

Ep 6100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.3, First Action 7


 25%|██▍       | 6205/25000 [02:29<08:34, 36.50it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 7


 25%|██▌       | 6308/25000 [02:32<07:05, 43.90it/s]

Ep 6300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 7


 26%|██▌       | 6408/25000 [02:34<06:46, 45.75it/s]

Ep 6400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 7


 26%|██▌       | 6508/25000 [02:36<06:37, 46.55it/s]

Ep 6500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.7, First Action 7


 26%|██▋       | 6608/25000 [02:38<06:41, 45.76it/s]

Ep 6600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 0


 27%|██▋       | 6708/25000 [02:40<06:35, 46.26it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 7


 27%|██▋       | 6804/25000 [02:43<08:18, 36.48it/s]

Ep 6800/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 7


 28%|██▊       | 6907/25000 [02:46<06:39, 45.34it/s]

Ep 6900/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.8, First Action 7


 28%|██▊       | 7007/25000 [02:48<06:22, 47.09it/s]

Ep 7000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.5, First Action 7


 28%|██▊       | 7107/25000 [02:50<06:22, 46.79it/s]

Ep 7100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 7


 29%|██▉       | 7207/25000 [02:52<06:18, 46.95it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 29%|██▉       | 7307/25000 [02:54<06:38, 44.45it/s]

Ep 7300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 5


 30%|██▉       | 7406/25000 [02:57<08:05, 36.25it/s]

Ep 7400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 7


 30%|███       | 7505/25000 [03:00<06:33, 44.50it/s]

Ep 7500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 7


 30%|███       | 7605/25000 [03:02<06:15, 46.26it/s]

Ep 7600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 7


 31%|███       | 7705/25000 [03:04<06:13, 46.36it/s]

Ep 7700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.9, First Action 7


 31%|███       | 7805/25000 [03:06<06:16, 45.73it/s]

Ep 7800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


 32%|███▏      | 7905/25000 [03:08<06:15, 45.56it/s]

Ep 7900/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.4, First Action 7


 32%|███▏      | 8007/25000 [03:11<07:43, 36.62it/s]

Ep 8000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 32%|███▏      | 8108/25000 [03:14<06:03, 46.53it/s]

Ep 8100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.1, First Action 7


 33%|███▎      | 8208/25000 [03:16<05:52, 47.66it/s]

Ep 8200/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.1, First Action 7


 33%|███▎      | 8308/25000 [03:18<05:55, 47.01it/s]

Ep 8300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 34%|███▎      | 8408/25000 [03:20<05:55, 46.71it/s]

Ep 8400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 7


 34%|███▍      | 8508/25000 [03:22<06:07, 44.84it/s]

Ep 8500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 34%|███▍      | 8606/25000 [03:25<07:21, 37.11it/s]

Ep 8600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 7


 35%|███▍      | 8706/25000 [03:28<06:05, 44.54it/s]

Ep 8700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 35%|███▌      | 8806/25000 [03:30<05:49, 46.31it/s]

Ep 8800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 7


 36%|███▌      | 8906/25000 [03:32<05:56, 45.14it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 7


 36%|███▌      | 9006/25000 [03:34<05:53, 45.29it/s]

Ep 9000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.9, First Action 7


 36%|███▋      | 9106/25000 [03:37<05:50, 45.37it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.1, First Action 7


 37%|███▋      | 9207/25000 [03:39<07:18, 36.01it/s]

Ep 9200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 7


 37%|███▋      | 9309/25000 [03:42<05:50, 44.79it/s]

Ep 9300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


 38%|███▊      | 9409/25000 [03:44<05:34, 46.61it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.2, First Action 7


 38%|███▊      | 9509/25000 [03:46<05:40, 45.51it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 7


 38%|███▊      | 9609/25000 [03:49<05:30, 46.54it/s]

Ep 9600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 7


 39%|███▉      | 9709/25000 [03:51<05:42, 44.62it/s]

Ep 9700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.4, First Action 7


 39%|███▉      | 9807/25000 [03:53<06:55, 36.56it/s]

Ep 9800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 7


 40%|███▉      | 9909/25000 [03:56<05:40, 44.26it/s]

Ep 9900/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 7


 40%|████      | 10009/25000 [03:58<05:23, 46.30it/s]

Ep 10000/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.8, First Action 7


 40%|████      | 10109/25000 [04:00<05:30, 45.12it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.0, First Action 7


 41%|████      | 10209/25000 [04:03<05:20, 46.22it/s]

Ep 10200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 41%|████      | 10309/25000 [04:05<05:28, 44.69it/s]

Ep 10300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 7


 42%|████▏     | 10403/25000 [04:07<06:24, 37.98it/s]

Ep 10400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 42%|████▏     | 10506/25000 [04:10<05:18, 45.57it/s]

Ep 10500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 7


 42%|████▏     | 10606/25000 [04:12<05:13, 45.97it/s]

Ep 10600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 7


 43%|████▎     | 10706/25000 [04:14<05:16, 45.23it/s]

Ep 10700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 7


 43%|████▎     | 10806/25000 [04:17<05:15, 45.02it/s]

Ep 10800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 44%|████▎     | 10906/25000 [04:19<05:18, 44.25it/s]

Ep 10900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 44%|████▍     | 11004/25000 [04:22<06:44, 34.60it/s]

Ep 11000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 7


 44%|████▍     | 11109/25000 [04:24<05:09, 44.95it/s]

Ep 11100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.0, First Action 7


 45%|████▍     | 11209/25000 [04:27<04:58, 46.14it/s]

Ep 11200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 7


 45%|████▌     | 11309/25000 [04:29<04:53, 46.70it/s]

Ep 11300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.5, First Action 7


 46%|████▌     | 11409/25000 [04:31<04:52, 46.46it/s]

Ep 11400/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.4, First Action 7


 46%|████▌     | 11509/25000 [04:33<04:48, 46.76it/s]

Ep 11500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 46%|████▋     | 11607/25000 [04:36<06:03, 36.84it/s]

Ep 11600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 7


 47%|████▋     | 11707/25000 [04:38<04:58, 44.60it/s]

Ep 11700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 7


 47%|████▋     | 11807/25000 [04:41<04:59, 44.06it/s]

Ep 11800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 7


 48%|████▊     | 11907/25000 [04:43<04:44, 45.95it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 48%|████▊     | 12007/25000 [04:45<04:40, 46.33it/s]

Ep 12000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 7


 48%|████▊     | 12107/25000 [04:47<04:40, 45.95it/s]

Ep 12100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 7


 49%|████▉     | 12206/25000 [04:50<05:46, 36.89it/s]

Ep 12200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 7


 49%|████▉     | 12309/25000 [04:53<04:42, 44.95it/s]

Ep 12300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 7


 50%|████▉     | 12409/25000 [04:55<04:33, 46.10it/s]

Ep 12400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 7


 50%|█████     | 12509/25000 [04:57<04:34, 45.43it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 7


 50%|█████     | 12609/25000 [04:59<04:26, 46.45it/s]

Ep 12600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 51%|█████     | 12709/25000 [05:01<04:27, 46.01it/s]

Ep 12700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 7


 51%|█████     | 12804/25000 [05:04<05:45, 35.29it/s]

Ep 12800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 52%|█████▏    | 12909/25000 [05:07<04:38, 43.47it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 7


 52%|█████▏    | 13009/25000 [05:09<04:22, 45.73it/s]

Ep 13000/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.5, First Action 7


 52%|█████▏    | 13109/25000 [05:11<04:17, 46.24it/s]

Ep 13100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.2, First Action 7


 53%|█████▎    | 13209/25000 [05:13<04:15, 46.12it/s]

Ep 13200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 7


 53%|█████▎    | 13309/25000 [05:15<04:16, 45.60it/s]

Ep 13300/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 12.7, First Action 7


 54%|█████▎    | 13404/25000 [05:18<05:15, 36.73it/s]

Ep 13400/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.4, First Action 7


 54%|█████▍    | 13509/25000 [05:21<04:18, 44.44it/s]

Ep 13500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.4, First Action 7


 54%|█████▍    | 13609/25000 [05:23<04:01, 47.15it/s]

Ep 13600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 7


 55%|█████▍    | 13709/25000 [05:25<04:08, 45.52it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 7


 55%|█████▌    | 13804/25000 [05:27<04:05, 45.61it/s]

Ep 13800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 56%|█████▌    | 13909/25000 [05:30<03:59, 46.33it/s]

Ep 13900/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.8, First Action 7


 56%|█████▌    | 14004/25000 [05:32<04:55, 37.26it/s]

Ep 14000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 7


 56%|█████▋    | 14108/25000 [05:35<04:25, 40.99it/s]

Ep 14100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 57%|█████▋    | 14208/25000 [05:37<03:58, 45.28it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 7


 57%|█████▋    | 14308/25000 [05:39<03:58, 44.75it/s]

Ep 14300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 7


 58%|█████▊    | 14406/25000 [05:42<04:47, 36.83it/s]

Ep 14400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 7


 58%|█████▊    | 14507/25000 [05:45<04:20, 40.21it/s]

Ep 14500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 7


 58%|█████▊    | 14606/25000 [05:47<05:17, 32.71it/s]

Ep 14600/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.9, First Action 7


 59%|█████▉    | 14705/25000 [05:50<03:44, 45.93it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.2, First Action 7


 59%|█████▉    | 14805/25000 [05:52<03:40, 46.24it/s]

Ep 14800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 60%|█████▉    | 14905/25000 [05:54<03:45, 44.68it/s]

Ep 14900/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 7


 60%|██████    | 15005/25000 [05:57<03:41, 45.05it/s]

Ep 15000/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.7, First Action 7


 60%|██████    | 15105/25000 [05:59<03:46, 43.63it/s]

Ep 15100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 7


 61%|██████    | 15204/25000 [06:02<04:55, 33.17it/s]

Ep 15200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 61%|██████    | 15309/25000 [06:04<03:32, 45.54it/s]

Ep 15300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.4, First Action 7


 62%|██████▏   | 15409/25000 [06:06<03:31, 45.41it/s]

Ep 15400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.9, First Action 7


 62%|██████▏   | 15509/25000 [06:09<03:26, 45.91it/s]

Ep 15500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 7


 62%|██████▏   | 15609/25000 [06:11<03:21, 46.66it/s]

Ep 15600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 63%|██████▎   | 15704/25000 [06:13<03:45, 41.20it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 7


 63%|██████▎   | 15803/25000 [06:16<04:38, 33.01it/s]

Ep 15800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 7


 64%|██████▎   | 15909/25000 [06:18<03:19, 45.50it/s]

Ep 15900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 7


 64%|██████▍   | 16009/25000 [06:21<03:19, 45.02it/s]

Ep 16000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 7


 64%|██████▍   | 16109/25000 [06:23<03:15, 45.47it/s]

Ep 16100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 7


 65%|██████▍   | 16209/25000 [06:25<03:10, 46.16it/s]

Ep 16200/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 7


 65%|██████▌   | 16304/25000 [06:27<03:37, 39.99it/s]

Ep 16300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 7


 66%|██████▌   | 16404/25000 [06:30<04:19, 33.14it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 7


 66%|██████▌   | 16505/25000 [06:32<03:10, 44.52it/s]

Ep 16500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 7


 66%|██████▋   | 16605/25000 [06:35<03:04, 45.45it/s]

Ep 16600/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 7


 67%|██████▋   | 16705/25000 [06:37<02:59, 46.10it/s]

Ep 16700/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.7, First Action 7


 67%|██████▋   | 16805/25000 [06:39<02:57, 46.21it/s]

Ep 16800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.6, First Action 7


 68%|██████▊   | 16905/25000 [06:41<03:26, 39.23it/s]

Ep 16900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 7


 68%|██████▊   | 17006/25000 [06:44<03:50, 34.64it/s]

Ep 17000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.0, First Action 7


 68%|██████▊   | 17108/25000 [06:47<02:59, 43.96it/s]

Ep 17100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 7


 69%|██████▉   | 17208/25000 [06:49<02:50, 45.79it/s]

Ep 17200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 7


 69%|██████▉   | 17308/25000 [06:51<02:46, 46.10it/s]

Ep 17300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.5, First Action 7


 70%|██████▉   | 17408/25000 [06:53<02:47, 45.24it/s]

Ep 17400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 7


 70%|███████   | 17503/25000 [06:55<03:08, 39.85it/s]

Ep 17500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 7


 70%|███████   | 17605/25000 [06:58<03:40, 33.51it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 71%|███████   | 17706/25000 [07:01<02:36, 46.57it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 71%|███████   | 17806/25000 [07:03<02:35, 46.34it/s]

Ep 17800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 7


 72%|███████▏  | 17906/25000 [07:05<02:33, 46.31it/s]

Ep 17900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 7


 72%|███████▏  | 18006/25000 [07:07<02:32, 45.72it/s]

Ep 18000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.3, First Action 7


 72%|███████▏  | 18106/25000 [07:10<02:57, 38.85it/s]

Ep 18100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 7


 73%|███████▎  | 18204/25000 [07:12<03:08, 36.02it/s]

Ep 18200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 7


 73%|███████▎  | 18307/25000 [07:15<02:26, 45.56it/s]

Ep 18300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.1, First Action 7


 74%|███████▎  | 18407/25000 [07:17<02:23, 45.94it/s]

Ep 18400/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 7


 74%|███████▍  | 18507/25000 [07:19<02:21, 46.04it/s]

Ep 18500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 7


 74%|███████▍  | 18607/25000 [07:21<02:18, 46.12it/s]

Ep 18600/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 7


 75%|███████▍  | 18707/25000 [07:24<02:19, 45.15it/s]

Ep 18700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 75%|███████▌  | 18807/25000 [07:26<02:53, 35.73it/s]

Ep 18800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 76%|███████▌  | 18907/25000 [07:29<02:09, 46.89it/s]

Ep 18900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 76%|███████▌  | 19007/25000 [07:31<02:10, 45.75it/s]

Ep 19000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 7


 76%|███████▋  | 19107/25000 [07:33<02:09, 45.43it/s]

Ep 19100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 7


 77%|███████▋  | 19207/25000 [07:35<02:09, 44.78it/s]

Ep 19200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 77%|███████▋  | 19307/25000 [07:38<02:17, 41.44it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.7, First Action 7


 78%|███████▊  | 19405/25000 [07:40<02:50, 32.86it/s]

Ep 19400/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.6, First Action 7


 78%|███████▊  | 19508/25000 [07:43<01:58, 46.46it/s]

Ep 19500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 78%|███████▊  | 19608/25000 [07:45<01:53, 47.54it/s]

Ep 19600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 7


 79%|███████▉  | 19708/25000 [07:47<01:57, 44.94it/s]

Ep 19700/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 7


 79%|███████▉  | 19808/25000 [07:50<01:52, 46.14it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 80%|███████▉  | 19908/25000 [07:52<01:53, 44.93it/s]

Ep 19900/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 7


 80%|████████  | 20003/25000 [07:54<02:19, 35.86it/s]

Ep 20000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 7


 80%|████████  | 20105/25000 [07:57<01:48, 45.02it/s]

Ep 20100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 81%|████████  | 20205/25000 [07:59<01:44, 46.07it/s]

Ep 20200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 81%|████████  | 20305/25000 [08:01<01:43, 45.38it/s]

Ep 20300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 7


 82%|████████▏ | 20405/25000 [08:04<01:40, 45.72it/s]

Ep 20400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 7


 82%|████████▏ | 20505/25000 [08:06<01:37, 46.13it/s]

Ep 20500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 7


 82%|████████▏ | 20605/25000 [08:09<02:01, 36.11it/s]

Ep 20600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 7


 83%|████████▎ | 20709/25000 [08:11<01:33, 46.01it/s]

Ep 20700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 83%|████████▎ | 20804/25000 [08:13<01:31, 45.92it/s]

Ep 20800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 7


 84%|████████▎ | 20909/25000 [08:16<01:30, 45.17it/s]

Ep 20900/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 7


 84%|████████▍ | 21009/25000 [08:18<01:26, 46.37it/s]

Ep 21000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 7


 84%|████████▍ | 21109/25000 [08:20<01:25, 45.33it/s]

Ep 21100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 85%|████████▍ | 21207/25000 [08:23<01:42, 37.02it/s]

Ep 21200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 7


 85%|████████▌ | 21309/25000 [08:25<01:26, 42.66it/s]

Ep 21300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 7


 86%|████████▌ | 21409/25000 [08:28<01:18, 45.65it/s]

Ep 21400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 7


 86%|████████▌ | 21509/25000 [08:30<01:15, 46.09it/s]

Ep 21500/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 7


 86%|████████▋ | 21609/25000 [08:32<01:13, 46.18it/s]

Ep 21600/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.9, First Action 7


 87%|████████▋ | 21709/25000 [08:34<01:09, 47.38it/s]

Ep 21700/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.0, First Action 7


 87%|████████▋ | 21805/25000 [08:37<01:27, 36.35it/s]

Ep 21800/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 7


 88%|████████▊ | 21907/25000 [08:39<01:11, 43.03it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 7


 88%|████████▊ | 22007/25000 [08:42<01:06, 45.13it/s]

Ep 22000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 7


 88%|████████▊ | 22107/25000 [08:44<01:04, 44.68it/s]

Ep 22100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 89%|████████▉ | 22207/25000 [08:46<01:00, 46.07it/s]

Ep 22200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 7


 89%|████████▉ | 22307/25000 [08:48<00:58, 46.36it/s]

Ep 22300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.3, First Action 7


 90%|████████▉ | 22404/25000 [08:51<01:09, 37.14it/s]

Ep 22400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 7


 90%|█████████ | 22508/25000 [08:54<00:57, 43.21it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 7


 90%|█████████ | 22608/25000 [08:56<00:51, 46.32it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.8, First Action 7


 91%|█████████ | 22708/25000 [08:58<00:49, 46.52it/s]

Ep 22700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 91%|█████████ | 22808/25000 [09:00<00:47, 46.22it/s]

Ep 22800/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.5, First Action 7


 92%|█████████▏| 22908/25000 [09:02<00:45, 46.42it/s]

Ep 22900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 7


 92%|█████████▏| 23006/25000 [09:05<00:53, 36.94it/s]

Ep 23000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 7


 92%|█████████▏| 23105/25000 [09:08<00:45, 41.71it/s]

Ep 23100/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 7


 93%|█████████▎| 23205/25000 [09:10<00:40, 44.81it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 7


 93%|█████████▎| 23305/25000 [09:12<00:37, 45.20it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 7


 94%|█████████▎| 23405/25000 [09:14<00:34, 46.41it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 7


 94%|█████████▍| 23505/25000 [09:16<00:33, 45.05it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 7


 94%|█████████▍| 23606/25000 [09:19<00:37, 37.44it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.6, First Action 7


 95%|█████████▍| 23708/25000 [09:22<00:31, 40.83it/s]

Ep 23700/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 14.8, First Action 10


 95%|█████████▌| 23808/25000 [09:24<00:26, 45.71it/s]

Ep 23800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 7


 96%|█████████▌| 23908/25000 [09:26<00:24, 45.10it/s]

Ep 23900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 7


 96%|█████████▌| 24008/25000 [09:29<00:23, 42.69it/s]

Ep 24000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.4, First Action 7


 96%|█████████▋| 24108/25000 [09:31<00:19, 45.70it/s]

Ep 24100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 7


 97%|█████████▋| 24206/25000 [09:33<00:22, 35.34it/s]

Ep 24200/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.2, First Action 7


 97%|█████████▋| 24308/25000 [09:36<00:15, 43.44it/s]

Ep 24300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 7


 98%|█████████▊| 24408/25000 [09:38<00:13, 44.90it/s]

Ep 24400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 7


 98%|█████████▊| 24508/25000 [09:41<00:10, 45.57it/s]

Ep 24500/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.9, First Action 7


 98%|█████████▊| 24608/25000 [09:43<00:08, 45.99it/s]

Ep 24600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 7


 99%|█████████▉| 24708/25000 [09:45<00:06, 47.21it/s]

Ep 24700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.4, First Action 7


 99%|█████████▉| 24804/25000 [09:47<00:05, 35.91it/s]

Ep 24800/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.7, First Action 7


100%|█████████▉| 24906/25000 [09:50<00:02, 44.76it/s]

Ep 24900/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 7


100%|██████████| 25000/25000 [09:52<00:00, 42.18it/s]


Ep 25000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 7

TEST:


 39%|███▊      | 116/300 [00:00<00:01, 118.93it/s]

Ep 100/300, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.7, First Action 7


 73%|███████▎  | 218/300 [00:01<00:00, 116.27it/s]

Ep 200/300, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.5, First Action 7


100%|██████████| 300/300 [00:02<00:00, 120.62it/s]


Ep 300/300, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.3, First Action 7

GAMMA 0.8 - LR 0.0001 - Entropy Decay False


  0%|          | 105/25000 [00:02<11:21, 36.51it/s]

Ep 100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


  1%|          | 205/25000 [00:05<10:01, 41.21it/s]

Ep 200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


  1%|          | 305/25000 [00:07<08:57, 45.98it/s]

Ep 300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


  2%|▏         | 405/25000 [00:09<08:53, 46.09it/s]

Ep 400/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 17.9, First Action 4


  2%|▏         | 505/25000 [00:12<08:51, 46.11it/s]

Ep 500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


  2%|▏         | 605/25000 [00:14<08:48, 46.20it/s]

Ep 600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


  3%|▎         | 703/25000 [00:16<11:09, 36.30it/s]

Ep 700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


  3%|▎         | 806/25000 [00:19<09:42, 41.57it/s]

Ep 800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


  4%|▎         | 906/25000 [00:21<08:55, 45.00it/s]

Ep 900/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 17.6, First Action 6


  4%|▍         | 1006/25000 [00:24<08:51, 45.16it/s]

Ep 1000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  4%|▍         | 1106/25000 [00:26<08:54, 44.72it/s]

Ep 1100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


  5%|▍         | 1206/25000 [00:28<08:36, 46.11it/s]

Ep 1200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  5%|▌         | 1305/25000 [00:31<10:28, 37.68it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  6%|▌         | 1408/25000 [00:33<10:16, 38.27it/s]

Ep 1400/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 6


  6%|▌         | 1507/25000 [00:36<08:43, 44.84it/s]

Ep 1500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


  6%|▋         | 1607/25000 [00:38<08:34, 45.43it/s]

Ep 1600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


  7%|▋         | 1707/25000 [00:40<08:16, 46.95it/s]

Ep 1700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  7%|▋         | 1807/25000 [00:42<08:12, 47.06it/s]

Ep 1800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  8%|▊         | 1907/25000 [00:45<10:31, 36.56it/s]

Ep 1900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


  8%|▊         | 2005/25000 [00:48<09:46, 39.18it/s]

Ep 2000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 6


  8%|▊         | 2105/25000 [00:50<08:24, 45.39it/s]

Ep 2100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


  9%|▉         | 2205/25000 [00:52<08:28, 44.83it/s]

Ep 2200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  9%|▉         | 2305/25000 [00:54<08:11, 46.19it/s]

Ep 2300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 10%|▉         | 2405/25000 [00:56<08:18, 45.37it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 10%|█         | 2505/25000 [00:59<10:23, 36.08it/s]

Ep 2500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 10%|█         | 2607/25000 [01:02<09:32, 39.08it/s]

Ep 2600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 11%|█         | 2707/25000 [01:04<08:02, 46.16it/s]

Ep 2700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 11%|█         | 2807/25000 [01:06<07:50, 47.13it/s]

Ep 2800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 12%|█▏        | 2907/25000 [01:08<07:56, 46.34it/s]

Ep 2900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


 12%|█▏        | 3007/25000 [01:10<08:06, 45.17it/s]

Ep 3000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 12%|█▏        | 3107/25000 [01:13<09:38, 37.84it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 13%|█▎        | 3207/25000 [01:16<09:03, 40.12it/s]

Ep 3200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 13%|█▎        | 3307/25000 [01:18<07:59, 45.22it/s]

Ep 3300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 14%|█▎        | 3407/25000 [01:20<07:38, 47.12it/s]

Ep 3400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 14%|█▍        | 3507/25000 [01:22<07:46, 46.12it/s]

Ep 3500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 14%|█▍        | 3607/25000 [01:25<07:54, 45.13it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 15%|█▍        | 3707/25000 [01:27<09:26, 37.62it/s]

Ep 3700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 15%|█▌        | 3809/25000 [01:30<08:46, 40.26it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 8


 16%|█▌        | 3909/25000 [01:32<07:26, 47.22it/s]

Ep 3900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 16%|█▌        | 4009/25000 [01:34<07:39, 45.64it/s]

Ep 4000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 16%|█▋        | 4104/25000 [01:36<07:30, 46.43it/s]

Ep 4100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 17%|█▋        | 4209/25000 [01:39<07:40, 45.12it/s]

Ep 4200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 17%|█▋        | 4305/25000 [01:41<09:15, 37.24it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 5


 18%|█▊        | 4407/25000 [01:44<09:57, 34.49it/s]

Ep 4400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 18%|█▊        | 4507/25000 [01:46<07:25, 46.00it/s]

Ep 4500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 7


 18%|█▊        | 4607/25000 [01:49<07:27, 45.58it/s]

Ep 4600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 19%|█▉        | 4707/25000 [01:51<07:29, 45.18it/s]

Ep 4700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 19%|█▉        | 4807/25000 [01:53<07:17, 46.14it/s]

Ep 4800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 20%|█▉        | 4904/25000 [01:55<08:51, 37.82it/s]

Ep 4900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 20%|██        | 5004/25000 [01:58<09:20, 35.67it/s]

Ep 5000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 20%|██        | 5105/25000 [02:00<06:57, 47.60it/s]

Ep 5100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 21%|██        | 5205/25000 [02:03<07:25, 44.42it/s]

Ep 5200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 21%|██        | 5305/25000 [02:05<07:12, 45.57it/s]

Ep 5300/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.7, First Action 7


 22%|██▏       | 5405/25000 [02:07<07:07, 45.84it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 22%|██▏       | 5505/25000 [02:09<08:37, 37.67it/s]

Ep 5500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 22%|██▏       | 5603/25000 [02:12<09:52, 32.72it/s]

Ep 5600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


 23%|██▎       | 5706/25000 [02:15<07:06, 45.24it/s]

Ep 5700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 23%|██▎       | 5806/25000 [02:17<07:00, 45.60it/s]

Ep 5800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 24%|██▎       | 5906/25000 [02:19<06:52, 46.27it/s]

Ep 5900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 24%|██▍       | 6006/25000 [02:21<06:39, 47.56it/s]

Ep 6000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 24%|██▍       | 6105/25000 [02:23<08:45, 35.97it/s]

Ep 6100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 8


 25%|██▍       | 6205/25000 [02:26<09:03, 34.56it/s]

Ep 6200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 25%|██▌       | 6309/25000 [02:29<06:55, 44.98it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 26%|██▌       | 6409/25000 [02:31<06:47, 45.64it/s]

Ep 6400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 26%|██▌       | 6509/25000 [02:33<06:40, 46.21it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 26%|██▋       | 6609/25000 [02:35<06:41, 45.82it/s]

Ep 6600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 27%|██▋       | 6703/25000 [02:38<08:22, 36.45it/s]

Ep 6700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 8


 27%|██▋       | 6804/25000 [02:40<08:54, 34.05it/s]

Ep 6800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


 28%|██▊       | 6907/25000 [02:43<06:37, 45.56it/s]

Ep 6900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 8


 28%|██▊       | 7007/25000 [02:45<06:25, 46.67it/s]

Ep 7000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 28%|██▊       | 7107/25000 [02:47<06:35, 45.24it/s]

Ep 7100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 29%|██▉       | 7207/25000 [02:50<06:44, 44.00it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 29%|██▉       | 7305/25000 [02:52<08:22, 35.22it/s]

Ep 7300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 30%|██▉       | 7405/25000 [02:55<09:01, 32.49it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 30%|███       | 7507/25000 [02:57<06:18, 46.16it/s]

Ep 7500/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 4


 30%|███       | 7607/25000 [02:59<06:15, 46.35it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 8


 31%|███       | 7707/25000 [03:02<06:19, 45.56it/s]

Ep 7700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 31%|███       | 7807/25000 [03:04<06:16, 45.65it/s]

Ep 7800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 32%|███▏      | 7907/25000 [03:06<07:28, 38.12it/s]

Ep 7900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


 32%|███▏      | 8004/25000 [03:09<08:37, 32.85it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 32%|███▏      | 8108/25000 [03:11<06:05, 46.17it/s]

Ep 8100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 33%|███▎      | 8208/25000 [03:14<06:17, 44.50it/s]

Ep 8200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 33%|███▎      | 8308/25000 [03:16<06:06, 45.56it/s]

Ep 8300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 34%|███▎      | 8408/25000 [03:18<06:01, 45.91it/s]

Ep 8400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 3


 34%|███▍      | 8503/25000 [03:20<06:18, 43.54it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


 34%|███▍      | 8606/25000 [03:23<07:37, 35.85it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 35%|███▍      | 8705/25000 [03:25<05:58, 45.42it/s]

Ep 8700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 35%|███▌      | 8805/25000 [03:28<05:54, 45.67it/s]

Ep 8800/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 5


 36%|███▌      | 8905/25000 [03:30<05:53, 45.57it/s]

Ep 8900/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 9


 36%|███▌      | 9005/25000 [03:32<05:48, 45.94it/s]

Ep 9000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 6


 36%|███▋      | 9105/25000 [03:34<05:55, 44.70it/s]

Ep 9100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 37%|███▋      | 9205/25000 [03:37<07:15, 36.25it/s]

Ep 9200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 37%|███▋      | 9305/25000 [03:40<05:54, 44.22it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 38%|███▊      | 9405/25000 [03:42<05:44, 45.22it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 38%|███▊      | 9505/25000 [03:44<05:37, 45.94it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 38%|███▊      | 9605/25000 [03:46<05:30, 46.64it/s]

Ep 9600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 6


 39%|███▉      | 9705/25000 [03:48<05:28, 46.62it/s]

Ep 9700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 39%|███▉      | 9807/25000 [03:51<06:57, 36.40it/s]

Ep 9800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 40%|███▉      | 9909/25000 [03:54<05:31, 45.56it/s]

Ep 9900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 40%|████      | 10009/25000 [03:56<05:25, 46.03it/s]

Ep 10000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 40%|████      | 10109/25000 [03:58<05:27, 45.44it/s]

Ep 10100/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 8


 41%|████      | 10209/25000 [04:00<05:19, 46.33it/s]

Ep 10200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 41%|████      | 10309/25000 [04:02<05:14, 46.70it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 42%|████▏     | 10407/25000 [04:05<06:51, 35.48it/s]

Ep 10400/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 42%|████▏     | 10509/25000 [04:08<05:22, 44.87it/s]

Ep 10500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 42%|████▏     | 10604/25000 [04:10<05:24, 44.34it/s]

Ep 10600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 43%|████▎     | 10705/25000 [04:13<06:52, 34.66it/s]

Ep 10700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 43%|████▎     | 10805/25000 [04:15<05:10, 45.74it/s]

Ep 10800/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 9


 44%|████▎     | 10904/25000 [04:18<06:29, 36.17it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 44%|████▍     | 11004/25000 [04:21<06:59, 33.35it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 44%|████▍     | 11105/25000 [04:23<04:57, 46.74it/s]

Ep 11100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 45%|████▍     | 11205/25000 [04:25<04:58, 46.16it/s]

Ep 11200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 45%|████▌     | 11305/25000 [04:27<04:54, 46.44it/s]

Ep 11300/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 8


 46%|████▌     | 11405/25000 [04:29<04:51, 46.63it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 0


 46%|████▌     | 11506/25000 [04:32<05:55, 37.98it/s]

Ep 11500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 46%|████▋     | 11606/25000 [04:35<06:56, 32.18it/s]

Ep 11600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


 47%|████▋     | 11709/25000 [04:37<04:44, 46.75it/s]

Ep 11700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 47%|████▋     | 11809/25000 [04:39<04:50, 45.36it/s]

Ep 11800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 48%|████▊     | 11904/25000 [04:41<04:47, 45.58it/s]

Ep 11900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 48%|████▊     | 12004/25000 [04:44<05:04, 42.68it/s]

Ep 12000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 48%|████▊     | 12105/25000 [04:46<05:48, 37.03it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 49%|████▉     | 12205/25000 [04:49<06:20, 33.62it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 49%|████▉     | 12309/25000 [04:51<04:39, 45.35it/s]

Ep 12300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


 50%|████▉     | 12409/25000 [04:54<04:37, 45.41it/s]

Ep 12400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 50%|█████     | 12509/25000 [04:56<04:35, 45.31it/s]

Ep 12500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 50%|█████     | 12609/25000 [04:58<04:26, 46.46it/s]

Ep 12600/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 9


 51%|█████     | 12706/25000 [05:00<05:19, 38.48it/s]

Ep 12700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 51%|█████     | 12803/25000 [05:03<06:20, 32.04it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 52%|█████▏    | 12908/25000 [05:06<04:27, 45.13it/s]

Ep 12900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 52%|█████▏    | 13008/25000 [05:08<04:30, 44.27it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 52%|█████▏    | 13108/25000 [05:10<04:20, 45.69it/s]

Ep 13100/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.4, First Action 5


 53%|█████▎    | 13208/25000 [05:12<04:16, 46.00it/s]

Ep 13200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 53%|█████▎    | 13305/25000 [05:15<05:13, 37.28it/s]

Ep 13300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 54%|█████▎    | 13406/25000 [05:17<05:38, 34.29it/s]

Ep 13400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13505/25000 [05:20<04:09, 46.07it/s]

Ep 13500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 54%|█████▍    | 13605/25000 [05:22<04:08, 45.94it/s]

Ep 13600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 55%|█████▍    | 13705/25000 [05:24<04:06, 45.77it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


 55%|█████▌    | 13805/25000 [05:26<04:00, 46.54it/s]

Ep 13800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 56%|█████▌    | 13904/25000 [05:29<05:05, 36.35it/s]

Ep 13900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 56%|█████▌    | 14005/25000 [05:32<05:20, 34.29it/s]

Ep 14000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 56%|█████▋    | 14107/25000 [05:34<03:57, 45.93it/s]

Ep 14100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 6


 57%|█████▋    | 14207/25000 [05:36<03:54, 46.11it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 57%|█████▋    | 14307/25000 [05:38<03:52, 45.95it/s]

Ep 14300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 58%|█████▊    | 14407/25000 [05:41<03:51, 45.68it/s]

Ep 14400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 58%|█████▊    | 14507/25000 [05:43<04:37, 37.80it/s]

Ep 14500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 58%|█████▊    | 14604/25000 [05:46<05:16, 32.80it/s]

Ep 14600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 59%|█████▉    | 14709/25000 [05:48<03:46, 45.38it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 59%|█████▉    | 14809/25000 [05:50<03:44, 45.48it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 60%|█████▉    | 14909/25000 [05:53<03:45, 44.71it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


 60%|██████    | 15009/25000 [05:55<03:37, 45.91it/s]

Ep 15000/25000, Opt. Action: 8, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 0


 60%|██████    | 15104/25000 [05:57<04:04, 40.41it/s]

Ep 15100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 61%|██████    | 15206/25000 [06:00<04:54, 33.30it/s]

Ep 15200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 61%|██████    | 15308/25000 [06:02<03:31, 45.90it/s]

Ep 15300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 9


 62%|██████▏   | 15408/25000 [06:05<03:36, 44.31it/s]

Ep 15400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 62%|██████▏   | 15508/25000 [06:07<03:29, 45.28it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 62%|██████▏   | 15608/25000 [06:09<03:21, 46.69it/s]

Ep 15600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 3


 63%|██████▎   | 15707/25000 [06:11<04:04, 38.03it/s]

Ep 15700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


 63%|██████▎   | 15803/25000 [06:14<04:53, 31.31it/s]

Ep 15800/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 0


 64%|██████▎   | 15908/25000 [06:17<03:22, 44.89it/s]

Ep 15900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 64%|██████▍   | 16008/25000 [06:19<03:17, 45.50it/s]

Ep 16000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 64%|██████▍   | 16108/25000 [06:21<03:14, 45.61it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 65%|██████▍   | 16208/25000 [06:23<03:12, 45.64it/s]

Ep 16200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 65%|██████▌   | 16303/25000 [06:26<03:33, 40.70it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 66%|██████▌   | 16406/25000 [06:28<03:56, 36.39it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 66%|██████▌   | 16508/25000 [06:31<03:03, 46.30it/s]

Ep 16500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 66%|██████▋   | 16608/25000 [06:33<03:04, 45.50it/s]

Ep 16600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 67%|██████▋   | 16708/25000 [06:35<03:01, 45.81it/s]

Ep 16700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 67%|██████▋   | 16808/25000 [06:38<03:05, 44.12it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 68%|██████▊   | 16908/25000 [06:40<03:07, 43.21it/s]

Ep 16900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 68%|██████▊   | 17003/25000 [06:42<03:42, 35.96it/s]

Ep 17000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 68%|██████▊   | 17106/25000 [06:45<02:59, 43.99it/s]

Ep 17100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 69%|██████▉   | 17206/25000 [06:47<02:51, 45.48it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 69%|██████▉   | 17306/25000 [06:50<02:52, 44.73it/s]

Ep 17300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 70%|██████▉   | 17406/25000 [06:52<02:52, 43.90it/s]

Ep 17400/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 4


 70%|███████   | 17505/25000 [06:54<03:20, 37.37it/s]

Ep 17500/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 8


 70%|███████   | 17605/25000 [06:57<03:40, 33.57it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 71%|███████   | 17708/25000 [07:00<02:40, 45.30it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 71%|███████   | 17808/25000 [07:02<02:35, 46.25it/s]

Ep 17800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 72%|███████▏  | 17908/25000 [07:04<02:33, 46.27it/s]

Ep 17900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 72%|███████▏  | 18008/25000 [07:06<02:30, 46.51it/s]

Ep 18000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 72%|███████▏  | 18103/25000 [07:08<02:34, 44.74it/s]

Ep 18100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 8


 73%|███████▎  | 18204/25000 [07:11<03:10, 35.74it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 73%|███████▎  | 18307/25000 [07:14<02:26, 45.54it/s]

Ep 18300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 74%|███████▎  | 18407/25000 [07:16<02:20, 47.04it/s]

Ep 18400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 74%|███████▍  | 18507/25000 [07:18<02:17, 47.19it/s]

Ep 18500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 74%|███████▍  | 18607/25000 [07:21<02:22, 44.97it/s]

Ep 18600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 75%|███████▍  | 18707/25000 [07:23<02:21, 44.45it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 8


 75%|███████▌  | 18806/25000 [07:25<03:04, 33.61it/s]

Ep 18800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 76%|███████▌  | 18909/25000 [07:28<02:12, 46.10it/s]

Ep 18900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 76%|███████▌  | 19009/25000 [07:30<02:10, 45.82it/s]

Ep 19000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 76%|███████▋  | 19104/25000 [07:32<02:10, 45.04it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 77%|███████▋  | 19204/25000 [07:35<02:13, 43.50it/s]

Ep 19200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 8


 77%|███████▋  | 19309/25000 [07:37<02:04, 45.64it/s]

Ep 19300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 78%|███████▊  | 19404/25000 [07:40<02:46, 33.62it/s]

Ep 19400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


 78%|███████▊  | 19508/25000 [07:42<01:58, 46.20it/s]

Ep 19500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 78%|███████▊  | 19608/25000 [07:45<01:58, 45.64it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 79%|███████▉  | 19708/25000 [07:47<01:56, 45.47it/s]

Ep 19700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 79%|███████▉  | 19808/25000 [07:49<01:53, 45.56it/s]

Ep 19800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 80%|███████▉  | 19903/25000 [07:51<01:53, 45.02it/s]

Ep 19900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 80%|████████  | 20006/25000 [07:54<02:25, 34.44it/s]

Ep 20000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 80%|████████  | 20108/25000 [07:57<01:51, 44.05it/s]

Ep 20100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 81%|████████  | 20208/25000 [07:59<01:42, 46.53it/s]

Ep 20200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 81%|████████  | 20308/25000 [08:01<01:41, 46.40it/s]

Ep 20300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 82%|████████▏ | 20408/25000 [08:03<01:38, 46.77it/s]

Ep 20400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 82%|████████▏ | 20508/25000 [08:05<01:34, 47.46it/s]

Ep 20500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 82%|████████▏ | 20606/25000 [08:08<02:01, 36.13it/s]

Ep 20600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 83%|████████▎ | 20705/25000 [08:11<01:34, 45.27it/s]

Ep 20700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 83%|████████▎ | 20805/25000 [08:13<01:31, 45.68it/s]

Ep 20800/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 5


 84%|████████▎ | 20905/25000 [08:15<01:30, 45.19it/s]

Ep 20900/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 6


 84%|████████▍ | 21005/25000 [08:17<01:27, 45.60it/s]

Ep 21000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 8


 84%|████████▍ | 21105/25000 [08:19<01:26, 45.18it/s]

Ep 21100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 85%|████████▍ | 21205/25000 [08:22<01:46, 35.61it/s]

Ep 21200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 85%|████████▌ | 21304/25000 [08:25<01:44, 35.30it/s]

Ep 21300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 86%|████████▌ | 21403/25000 [08:28<01:45, 34.08it/s]

Ep 21400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 86%|████████▌ | 21509/25000 [08:30<01:16, 45.90it/s]

Ep 21500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 86%|████████▋ | 21609/25000 [08:33<01:15, 45.08it/s]

Ep 21600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 87%|████████▋ | 21704/25000 [08:35<01:23, 39.67it/s]

Ep 21700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 87%|████████▋ | 21805/25000 [08:38<01:34, 33.97it/s]

Ep 21800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 88%|████████▊ | 21909/25000 [08:40<01:07, 45.91it/s]

Ep 21900/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 88%|████████▊ | 22009/25000 [08:42<01:05, 45.44it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 0


 88%|████████▊ | 22109/25000 [08:45<01:03, 45.46it/s]

Ep 22100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 89%|████████▉ | 22209/25000 [08:47<01:01, 45.46it/s]

Ep 22200/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 6


 89%|████████▉ | 22304/25000 [08:49<01:00, 44.75it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 3


 90%|████████▉ | 22406/25000 [08:52<01:15, 34.41it/s]

Ep 22400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 90%|█████████ | 22509/25000 [08:55<00:55, 45.22it/s]

Ep 22500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 90%|█████████ | 22609/25000 [08:57<00:52, 45.82it/s]

Ep 22600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 91%|█████████ | 22709/25000 [08:59<00:51, 44.76it/s]

Ep 22700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 91%|█████████ | 22804/25000 [09:01<00:52, 41.45it/s]

Ep 22800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 92%|█████████▏| 22906/25000 [09:04<00:57, 36.54it/s]

Ep 22900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 92%|█████████▏| 23005/25000 [09:07<00:59, 33.66it/s]

Ep 23000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 92%|█████████▏| 23108/25000 [09:09<00:42, 45.04it/s]

Ep 23100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 93%|█████████▎| 23208/25000 [09:12<00:41, 42.72it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 93%|█████████▎| 23308/25000 [09:14<00:38, 44.27it/s]

Ep 23300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 5


 94%|█████████▎| 23408/25000 [09:16<00:35, 44.99it/s]

Ep 23400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


 94%|█████████▍| 23503/25000 [09:18<00:37, 40.37it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


 94%|█████████▍| 23604/25000 [09:21<00:40, 34.21it/s]

Ep 23600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 95%|█████████▍| 23709/25000 [09:24<00:28, 45.70it/s]

Ep 23700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 95%|█████████▌| 23809/25000 [09:26<00:25, 46.49it/s]

Ep 23800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 96%|█████████▌| 23909/25000 [09:28<00:23, 46.16it/s]

Ep 23900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 6


 96%|█████████▌| 24009/25000 [09:31<00:23, 43.04it/s]

Ep 24000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 96%|█████████▋| 24107/25000 [09:33<00:24, 36.83it/s]

Ep 24100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 97%|█████████▋| 24203/25000 [09:36<00:24, 33.02it/s]

Ep 24200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 97%|█████████▋| 24308/25000 [09:38<00:15, 45.33it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 98%|█████████▊| 24408/25000 [09:41<00:13, 43.34it/s]

Ep 24400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 98%|█████████▊| 24508/25000 [09:43<00:11, 43.71it/s]

Ep 24500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 9


 98%|█████████▊| 24608/25000 [09:45<00:08, 44.54it/s]

Ep 24600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 99%|█████████▉| 24704/25000 [09:47<00:08, 35.85it/s]

Ep 24700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 99%|█████████▉| 24804/25000 [09:50<00:05, 32.68it/s]

Ep 24800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


100%|█████████▉| 24908/25000 [09:53<00:02, 43.60it/s]

Ep 24900/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 2


100%|██████████| 25000/25000 [09:55<00:00, 41.98it/s]


Ep 25000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5

TEST:


 39%|███▊      | 116/300 [00:00<00:01, 143.78it/s]

Ep 100/300, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 73%|███████▎  | 220/300 [00:01<00:00, 141.26it/s]

Ep 200/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


100%|██████████| 300/300 [00:02<00:00, 138.36it/s]


Ep 300/300, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8

GAMMA 1 - LR 0.001 - Entropy Decay True


  0%|          | 109/25000 [00:02<09:10, 45.22it/s]

Ep 100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  1%|          | 206/25000 [00:04<11:51, 34.87it/s]

Ep 200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


  1%|          | 306/25000 [00:07<12:16, 33.54it/s]

Ep 300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 0


  2%|▏         | 409/25000 [00:10<09:03, 45.26it/s]

Ep 400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


  2%|▏         | 509/25000 [00:12<08:52, 46.00it/s]

Ep 500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  2%|▏         | 609/25000 [00:14<09:02, 44.92it/s]

Ep 600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


  3%|▎         | 709/25000 [00:16<09:18, 43.45it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


  3%|▎         | 806/25000 [00:19<11:12, 36.00it/s]

Ep 800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


  4%|▎         | 906/25000 [00:22<12:02, 33.33it/s]

Ep 900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


  4%|▍         | 1009/25000 [00:24<08:44, 45.73it/s]

Ep 1000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


  4%|▍         | 1109/25000 [00:26<08:35, 46.32it/s]

Ep 1100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  5%|▍         | 1209/25000 [00:28<08:39, 45.83it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 7


  5%|▌         | 1309/25000 [00:31<08:18, 47.52it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  6%|▌         | 1407/25000 [00:33<10:44, 36.60it/s]

Ep 1400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  6%|▌         | 1505/25000 [00:36<11:59, 32.64it/s]

Ep 1500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


  6%|▋         | 1606/25000 [00:38<08:44, 44.57it/s]

Ep 1600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  7%|▋         | 1706/25000 [00:40<08:27, 45.92it/s]

Ep 1700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


  7%|▋         | 1806/25000 [00:43<08:14, 46.90it/s]

Ep 1800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


  8%|▊         | 1906/25000 [00:45<08:28, 45.40it/s]

Ep 1900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  8%|▊         | 2006/25000 [00:47<10:20, 37.06it/s]

Ep 2000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


  8%|▊         | 2106/25000 [00:50<11:13, 33.98it/s]

Ep 2100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


  9%|▉         | 2205/25000 [00:52<08:21, 45.47it/s]

Ep 2200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


  9%|▉         | 2305/25000 [00:55<08:34, 44.08it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 10%|▉         | 2405/25000 [00:57<08:26, 44.58it/s]

Ep 2400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 10%|█         | 2505/25000 [00:59<08:08, 46.07it/s]

Ep 2500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 10%|█         | 2606/25000 [01:01<10:10, 36.68it/s]

Ep 2600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 11%|█         | 2704/25000 [01:04<11:04, 33.55it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 11%|█         | 2806/25000 [01:07<08:03, 45.89it/s]

Ep 2800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 12%|█▏        | 2906/25000 [01:09<08:13, 44.80it/s]

Ep 2900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 12%|█▏        | 3006/25000 [01:11<08:05, 45.28it/s]

Ep 3000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 7


 12%|█▏        | 3106/25000 [01:13<07:55, 46.04it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 13%|█▎        | 3205/25000 [01:16<09:42, 37.43it/s]

Ep 3200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 13%|█▎        | 3307/25000 [01:18<10:18, 35.08it/s]

Ep 3300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 14%|█▎        | 3405/25000 [01:21<08:05, 44.46it/s]

Ep 3400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 14%|█▍        | 3505/25000 [01:23<07:43, 46.33it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 14%|█▍        | 3605/25000 [01:25<07:38, 46.63it/s]

Ep 3600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 15%|█▍        | 3705/25000 [01:27<07:37, 46.50it/s]

Ep 3700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 15%|█▌        | 3805/25000 [01:30<09:04, 38.93it/s]

Ep 3800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 7


 16%|█▌        | 3905/25000 [01:32<10:19, 34.04it/s]

Ep 3900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 16%|█▌        | 4009/25000 [01:35<07:45, 45.05it/s]

Ep 4000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 16%|█▋        | 4104/25000 [01:37<07:45, 44.90it/s]

Ep 4100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 17%|█▋        | 4209/25000 [01:40<07:31, 46.00it/s]

Ep 4200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 17%|█▋        | 4309/25000 [01:42<07:31, 45.78it/s]

Ep 4300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 7


 18%|█▊        | 4404/25000 [01:44<08:01, 42.76it/s]

Ep 4400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 18%|█▊        | 4506/25000 [01:47<10:22, 32.92it/s]

Ep 4500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 18%|█▊        | 4605/25000 [01:49<07:25, 45.76it/s]

Ep 4600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 19%|█▉        | 4705/25000 [01:51<07:09, 47.30it/s]

Ep 4700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 19%|█▉        | 4805/25000 [01:54<07:38, 44.04it/s]

Ep 4800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 20%|█▉        | 4905/25000 [01:56<07:31, 44.52it/s]

Ep 4900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 20%|██        | 5005/25000 [01:58<07:31, 44.29it/s]

Ep 5000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 20%|██        | 5104/25000 [02:01<09:33, 34.71it/s]

Ep 5100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 21%|██        | 5209/25000 [02:03<07:08, 46.14it/s]

Ep 5200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 21%|██        | 5309/25000 [02:06<07:16, 45.15it/s]

Ep 5300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 22%|██▏       | 5409/25000 [02:08<07:12, 45.32it/s]

Ep 5400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 22%|██▏       | 5509/25000 [02:10<07:03, 46.08it/s]

Ep 5500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 22%|██▏       | 5604/25000 [02:12<08:12, 39.36it/s]

Ep 5600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 23%|██▎       | 5705/25000 [02:15<09:29, 33.91it/s]

Ep 5700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 23%|██▎       | 5807/25000 [02:18<07:08, 44.78it/s]

Ep 5800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 24%|██▎       | 5907/25000 [02:20<07:07, 44.66it/s]

Ep 5900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 24%|██▍       | 6007/25000 [02:22<06:54, 45.84it/s]

Ep 6000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 24%|██▍       | 6107/25000 [02:24<06:49, 46.15it/s]

Ep 6100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 25%|██▍       | 6207/25000 [02:26<06:43, 46.57it/s]

Ep 6200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 25%|██▌       | 6305/25000 [02:29<08:55, 34.89it/s]

Ep 6300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 26%|██▌       | 6405/25000 [02:32<06:56, 44.63it/s]

Ep 6400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 26%|██▌       | 6505/25000 [02:34<06:38, 46.43it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 26%|██▋       | 6605/25000 [02:36<06:25, 47.74it/s]

Ep 6600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 27%|██▋       | 6705/25000 [02:38<06:37, 45.99it/s]

Ep 6700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 27%|██▋       | 6805/25000 [02:41<08:46, 34.55it/s]

Ep 6800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 7


 28%|██▊       | 6905/25000 [02:44<09:15, 32.56it/s]

Ep 6900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 7


 28%|██▊       | 7007/25000 [02:47<07:53, 37.97it/s]

Ep 7000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 28%|██▊       | 7107/25000 [02:49<06:33, 45.46it/s]

Ep 7100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 29%|██▉       | 7207/25000 [02:52<06:43, 44.07it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 29%|██▉       | 7307/25000 [02:54<06:41, 44.07it/s]

Ep 7300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 7


 30%|██▉       | 7407/25000 [02:56<06:22, 46.03it/s]

Ep 7400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 30%|███       | 7506/25000 [02:58<08:02, 36.29it/s]

Ep 7500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 30%|███       | 7608/25000 [03:01<08:05, 35.83it/s]

Ep 7600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 31%|███       | 7707/25000 [03:04<06:20, 45.40it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 31%|███       | 7807/25000 [03:06<06:29, 44.14it/s]

Ep 7800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 32%|███▏      | 7907/25000 [03:08<06:21, 44.83it/s]

Ep 7900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 32%|███▏      | 8007/25000 [03:10<06:15, 45.21it/s]

Ep 8000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 32%|███▏      | 8103/25000 [03:13<07:51, 35.86it/s]

Ep 8100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 7


 33%|███▎      | 8209/25000 [03:16<07:19, 38.18it/s]

Ep 8200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 33%|███▎      | 8309/25000 [03:18<06:11, 44.97it/s]

Ep 8300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 34%|███▎      | 8409/25000 [03:20<06:06, 45.29it/s]

Ep 8400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 34%|███▍      | 8509/25000 [03:22<06:05, 45.10it/s]

Ep 8500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 34%|███▍      | 8604/25000 [03:25<06:11, 44.19it/s]

Ep 8600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 35%|███▍      | 8706/25000 [03:27<07:52, 34.51it/s]

Ep 8700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 35%|███▌      | 8805/25000 [03:30<06:35, 40.93it/s]

Ep 8800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 36%|███▌      | 8905/25000 [03:32<05:47, 46.25it/s]

Ep 8900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 36%|███▌      | 9005/25000 [03:34<05:48, 45.84it/s]

Ep 9000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 36%|███▋      | 9105/25000 [03:37<05:53, 44.94it/s]

Ep 9100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 37%|███▋      | 9205/25000 [03:39<05:44, 45.82it/s]

Ep 9200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 37%|███▋      | 9306/25000 [03:41<07:09, 36.54it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 38%|███▊      | 9405/25000 [03:44<06:22, 40.81it/s]

Ep 9400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 38%|███▊      | 9505/25000 [03:47<05:41, 45.41it/s]

Ep 9500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 38%|███▊      | 9605/25000 [03:49<05:43, 44.87it/s]

Ep 9600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 39%|███▉      | 9705/25000 [03:51<05:28, 46.55it/s]

Ep 9700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 39%|███▉      | 9805/25000 [03:53<05:32, 45.71it/s]

Ep 9800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 40%|███▉      | 9907/25000 [03:56<07:12, 34.89it/s]

Ep 9900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 40%|████      | 10007/25000 [03:59<05:54, 42.34it/s]

Ep 10000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 40%|████      | 10107/25000 [04:01<05:43, 43.34it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 7


 41%|████      | 10207/25000 [04:03<05:28, 45.00it/s]

Ep 10200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 41%|████      | 10307/25000 [04:05<05:20, 45.81it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 42%|████▏     | 10407/25000 [04:08<05:14, 46.47it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 42%|████▏     | 10507/25000 [04:10<06:44, 35.87it/s]

Ep 10500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 42%|████▏     | 10609/25000 [04:13<05:22, 44.65it/s]

Ep 10600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 43%|████▎     | 10709/25000 [04:15<05:06, 46.59it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 43%|████▎     | 10809/25000 [04:17<05:01, 47.01it/s]

Ep 10800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 44%|████▎     | 10904/25000 [04:19<05:08, 45.69it/s]

Ep 10900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 44%|████▍     | 11009/25000 [04:22<05:15, 44.41it/s]

Ep 11000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 44%|████▍     | 11107/25000 [04:24<06:42, 34.51it/s]

Ep 11100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 45%|████▍     | 11209/25000 [04:27<05:07, 44.81it/s]

Ep 11200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 45%|████▌     | 11309/25000 [04:29<05:00, 45.63it/s]

Ep 11300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 46%|████▌     | 11409/25000 [04:32<05:01, 45.01it/s]

Ep 11400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 46%|████▌     | 11509/25000 [04:34<04:59, 45.12it/s]

Ep 11500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 46%|████▋     | 11609/25000 [04:36<04:53, 45.64it/s]

Ep 11600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 47%|████▋     | 11705/25000 [04:39<06:12, 35.66it/s]

Ep 11700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 47%|████▋     | 11806/25000 [04:41<05:04, 43.34it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 48%|████▊     | 11906/25000 [04:44<04:54, 44.53it/s]

Ep 11900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 48%|████▊     | 12006/25000 [04:46<04:49, 44.96it/s]

Ep 12000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 48%|████▊     | 12106/25000 [04:48<04:43, 45.44it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 49%|████▉     | 12206/25000 [04:50<04:35, 46.42it/s]

Ep 12200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 49%|████▉     | 12303/25000 [04:53<06:08, 34.41it/s]

Ep 12300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 50%|████▉     | 12409/25000 [04:56<04:48, 43.68it/s]

Ep 12400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 50%|█████     | 12509/25000 [04:58<04:44, 43.93it/s]

Ep 12500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 50%|█████     | 12609/25000 [05:00<04:37, 44.66it/s]

Ep 12600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 51%|█████     | 12709/25000 [05:03<04:33, 45.01it/s]

Ep 12700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 51%|█████     | 12809/25000 [05:05<04:36, 44.11it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 52%|█████▏    | 12906/25000 [05:08<05:38, 35.69it/s]

Ep 12900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 52%|█████▏    | 13005/25000 [05:10<04:30, 44.42it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 52%|█████▏    | 13105/25000 [05:12<04:24, 44.98it/s]

Ep 13100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 53%|█████▎    | 13205/25000 [05:15<04:26, 44.23it/s]

Ep 13200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 7


 53%|█████▎    | 13305/25000 [05:17<04:19, 45.05it/s]

Ep 13300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 54%|█████▎    | 13405/25000 [05:19<04:12, 45.86it/s]

Ep 13400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 54%|█████▍    | 13505/25000 [05:22<05:17, 36.15it/s]

Ep 13500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 54%|█████▍    | 13608/25000 [05:25<04:23, 43.27it/s]

Ep 13600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 55%|█████▍    | 13708/25000 [05:27<04:07, 45.56it/s]

Ep 13700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 55%|█████▌    | 13808/25000 [05:29<04:01, 46.36it/s]

Ep 13800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 56%|█████▌    | 13908/25000 [05:31<04:09, 44.50it/s]

Ep 13900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 56%|█████▌    | 14008/25000 [05:34<04:04, 44.94it/s]

Ep 14000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 56%|█████▋    | 14106/25000 [05:36<04:42, 38.51it/s]

Ep 14100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 57%|█████▋    | 14205/25000 [05:39<04:20, 41.50it/s]

Ep 14200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 57%|█████▋    | 14305/25000 [05:41<03:50, 46.31it/s]

Ep 14300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 58%|█████▊    | 14405/25000 [05:43<03:48, 46.31it/s]

Ep 14400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 58%|█████▊    | 14505/25000 [05:45<03:40, 47.50it/s]

Ep 14500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 58%|█████▊    | 14605/25000 [05:48<03:52, 44.81it/s]

Ep 14600/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 7


 59%|█████▉    | 14707/25000 [05:50<04:31, 37.93it/s]

Ep 14700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 59%|█████▉    | 14809/25000 [05:53<04:20, 39.09it/s]

Ep 14800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 60%|█████▉    | 14907/25000 [05:55<03:43, 45.16it/s]

Ep 14900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 60%|██████    | 15007/25000 [05:58<03:38, 45.68it/s]

Ep 15000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 60%|██████    | 15107/25000 [06:00<03:42, 44.51it/s]

Ep 15100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 61%|██████    | 15207/25000 [06:02<03:34, 45.60it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 61%|██████    | 15304/25000 [06:04<04:09, 38.89it/s]

Ep 15300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 62%|██████▏   | 15403/25000 [06:07<04:32, 35.17it/s]

Ep 15400/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 7


 62%|██████▏   | 15509/25000 [06:10<03:31, 44.80it/s]

Ep 15500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 7


 62%|██████▏   | 15609/25000 [06:12<03:31, 44.44it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 63%|██████▎   | 15709/25000 [06:14<03:26, 45.05it/s]

Ep 15700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 63%|██████▎   | 15809/25000 [06:16<03:20, 45.78it/s]

Ep 15800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 64%|██████▎   | 15906/25000 [06:19<04:02, 37.53it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 64%|██████▍   | 16004/25000 [06:21<04:35, 32.70it/s]

Ep 16000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 64%|██████▍   | 16106/25000 [06:24<03:20, 44.36it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 65%|██████▍   | 16206/25000 [06:26<03:11, 45.99it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 65%|██████▌   | 16306/25000 [06:28<03:09, 45.88it/s]

Ep 16300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 66%|██████▌   | 16406/25000 [06:30<03:05, 46.22it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 66%|██████▌   | 16505/25000 [06:33<03:52, 36.56it/s]

Ep 16500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 66%|██████▋   | 16606/25000 [06:36<04:07, 33.90it/s]

Ep 16600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 67%|██████▋   | 16709/25000 [06:38<03:09, 43.67it/s]

Ep 16700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 67%|██████▋   | 16809/25000 [06:40<02:59, 45.55it/s]

Ep 16800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 68%|██████▊   | 16909/25000 [06:42<02:57, 45.47it/s]

Ep 16900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 68%|██████▊   | 17009/25000 [06:45<02:56, 45.19it/s]

Ep 17000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 68%|██████▊   | 17107/25000 [06:47<03:35, 36.63it/s]

Ep 17100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 69%|██████▉   | 17204/25000 [06:50<04:02, 32.13it/s]

Ep 17200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 69%|██████▉   | 17305/25000 [06:53<03:33, 36.08it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 70%|██████▉   | 17405/25000 [06:55<02:45, 45.86it/s]

Ep 17400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 70%|███████   | 17505/25000 [06:58<02:46, 45.00it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 70%|███████   | 17605/25000 [07:00<02:41, 45.89it/s]

Ep 17600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 71%|███████   | 17706/25000 [07:02<03:11, 38.14it/s]

Ep 17700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 71%|███████   | 17807/25000 [07:05<03:13, 37.16it/s]

Ep 17800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 72%|███████▏  | 17907/25000 [07:08<02:34, 45.82it/s]

Ep 17900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 72%|███████▏  | 18007/25000 [07:10<02:34, 45.23it/s]

Ep 18000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 72%|███████▏  | 18107/25000 [07:12<02:29, 46.06it/s]

Ep 18100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 73%|███████▎  | 18207/25000 [07:14<02:28, 45.88it/s]

Ep 18200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 73%|███████▎  | 18306/25000 [07:17<03:11, 34.98it/s]

Ep 18300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 74%|███████▎  | 18408/25000 [07:20<02:59, 36.64it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 74%|███████▍  | 18507/25000 [07:22<02:26, 44.28it/s]

Ep 18500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 74%|███████▍  | 18607/25000 [07:24<02:22, 44.95it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 75%|███████▍  | 18707/25000 [07:26<02:19, 45.15it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 75%|███████▌  | 18807/25000 [07:29<02:17, 45.12it/s]

Ep 18800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 76%|███████▌  | 18908/25000 [07:31<02:35, 39.23it/s]

Ep 18900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 76%|███████▌  | 19005/25000 [07:34<03:04, 32.53it/s]

Ep 19000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 76%|███████▋  | 19105/25000 [07:36<02:08, 45.87it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 77%|███████▋  | 19205/25000 [07:38<02:11, 44.05it/s]

Ep 19200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 77%|███████▋  | 19305/25000 [07:41<02:09, 43.99it/s]

Ep 19300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 78%|███████▊  | 19405/25000 [07:43<02:04, 44.88it/s]

Ep 19400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 78%|███████▊  | 19504/25000 [07:45<02:24, 37.98it/s]

Ep 19500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 7


 78%|███████▊  | 19607/25000 [07:48<02:13, 40.51it/s]

Ep 19600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 79%|███████▉  | 19707/25000 [07:51<01:58, 44.76it/s]

Ep 19700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 79%|███████▉  | 19807/25000 [07:53<01:56, 44.46it/s]

Ep 19800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 80%|███████▉  | 19907/25000 [07:55<01:53, 44.93it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 80%|████████  | 20007/25000 [07:57<01:52, 44.34it/s]

Ep 20000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 80%|████████  | 20106/25000 [08:00<02:15, 35.99it/s]

Ep 20100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 81%|████████  | 20209/25000 [08:03<01:56, 40.99it/s]

Ep 20200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 81%|████████  | 20309/25000 [08:05<01:42, 45.73it/s]

Ep 20300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 82%|████████▏ | 20409/25000 [08:07<01:37, 47.18it/s]

Ep 20400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 82%|████████▏ | 20504/25000 [08:10<01:39, 45.08it/s]

Ep 20500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 82%|████████▏ | 20609/25000 [08:12<01:38, 44.48it/s]

Ep 20600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 83%|████████▎ | 20707/25000 [08:14<01:51, 38.54it/s]

Ep 20700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 83%|████████▎ | 20805/25000 [08:17<01:51, 37.75it/s]

Ep 20800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 84%|████████▎ | 20905/25000 [08:19<01:29, 45.86it/s]

Ep 20900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 84%|████████▍ | 21005/25000 [08:22<01:29, 44.39it/s]

Ep 21000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 84%|████████▍ | 21105/25000 [08:24<01:27, 44.68it/s]

Ep 21100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 85%|████████▍ | 21205/25000 [08:26<01:23, 45.60it/s]

Ep 21200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 85%|████████▌ | 21307/25000 [08:29<01:43, 35.82it/s]

Ep 21300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 86%|████████▌ | 21408/25000 [08:32<01:36, 37.11it/s]

Ep 21400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 86%|████████▌ | 21508/25000 [08:34<01:18, 44.49it/s]

Ep 21500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 86%|████████▋ | 21608/25000 [08:36<01:14, 45.49it/s]

Ep 21600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 87%|████████▋ | 21708/25000 [08:38<01:12, 45.66it/s]

Ep 21700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 87%|████████▋ | 21808/25000 [08:41<01:10, 45.18it/s]

Ep 21800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 88%|████████▊ | 21905/25000 [08:43<01:27, 35.28it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 88%|████████▊ | 22009/25000 [08:46<01:10, 42.22it/s]

Ep 22000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 88%|████████▊ | 22109/25000 [08:48<01:04, 45.12it/s]

Ep 22100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 89%|████████▉ | 22209/25000 [08:51<01:01, 45.54it/s]

Ep 22200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 89%|████████▉ | 22309/25000 [08:53<00:59, 44.89it/s]

Ep 22300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 90%|████████▉ | 22409/25000 [08:55<00:57, 45.05it/s]

Ep 22400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 90%|█████████ | 22504/25000 [08:57<01:03, 39.26it/s]

Ep 22500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 90%|█████████ | 22604/25000 [09:00<01:09, 34.46it/s]

Ep 22600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 91%|█████████ | 22709/25000 [09:03<00:51, 44.81it/s]

Ep 22700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 91%|█████████ | 22809/25000 [09:05<00:47, 45.75it/s]

Ep 22800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 92%|█████████▏| 22909/25000 [09:07<00:46, 45.06it/s]

Ep 22900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 92%|█████████▏| 23009/25000 [09:09<00:43, 45.64it/s]

Ep 23000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 92%|█████████▏| 23106/25000 [09:12<00:52, 36.22it/s]

Ep 23100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 93%|█████████▎| 23203/25000 [09:15<00:52, 34.24it/s]

Ep 23200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 93%|█████████▎| 23306/25000 [09:17<00:37, 44.70it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 94%|█████████▎| 23406/25000 [09:19<00:34, 45.97it/s]

Ep 23400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 94%|█████████▍| 23506/25000 [09:21<00:32, 46.48it/s]

Ep 23500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 94%|█████████▍| 23606/25000 [09:24<00:30, 46.17it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 95%|█████████▍| 23707/25000 [09:26<00:35, 36.74it/s]

Ep 23700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 95%|█████████▌| 23806/25000 [09:29<00:37, 32.02it/s]

Ep 23800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 96%|█████████▌| 23909/25000 [09:31<00:23, 45.92it/s]

Ep 23900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 96%|█████████▌| 24009/25000 [09:33<00:21, 47.04it/s]

Ep 24000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 96%|█████████▋| 24109/25000 [09:36<00:20, 44.35it/s]

Ep 24100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 97%|█████████▋| 24209/25000 [09:38<00:17, 45.15it/s]

Ep 24200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 7


 97%|█████████▋| 24303/25000 [09:40<00:20, 33.94it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 98%|█████████▊| 24403/25000 [09:43<00:18, 32.96it/s]

Ep 24400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 98%|█████████▊| 24505/25000 [09:46<00:10, 46.29it/s]

Ep 24500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 98%|█████████▊| 24605/25000 [09:48<00:08, 44.28it/s]

Ep 24600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 99%|█████████▉| 24705/25000 [09:50<00:06, 44.59it/s]

Ep 24700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 99%|█████████▉| 24805/25000 [09:52<00:04, 46.49it/s]

Ep 24800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


100%|█████████▉| 24907/25000 [09:55<00:02, 34.11it/s]

Ep 24900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


100%|██████████| 25000/25000 [09:57<00:00, 41.81it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7

TEST:


 39%|███▉      | 117/300 [00:00<00:01, 143.37it/s]

Ep 100/300, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 74%|███████▎  | 221/300 [00:01<00:00, 140.72it/s]

Ep 200/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


100%|██████████| 300/300 [00:02<00:00, 140.97it/s]


Ep 300/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7

GAMMA 1 - LR 0.001 - Entropy Decay False


  0%|          | 105/25000 [00:02<09:13, 44.99it/s]

Ep 100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


  1%|          | 205/25000 [00:04<09:20, 44.27it/s]

Ep 200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  1%|          | 307/25000 [00:07<11:04, 37.17it/s]

Ep 300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 2


  2%|▏         | 405/25000 [00:10<09:19, 43.99it/s]

Ep 400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


  2%|▏         | 505/25000 [00:12<08:57, 45.57it/s]

Ep 500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.7, First Action 2


  2%|▏         | 605/25000 [00:14<09:07, 44.59it/s]

Ep 600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


  3%|▎         | 705/25000 [00:16<09:07, 44.40it/s]

Ep 700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  3%|▎         | 805/25000 [00:18<09:14, 43.61it/s]

Ep 800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  4%|▎         | 905/25000 [00:21<11:05, 36.19it/s]

Ep 900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  4%|▍         | 1007/25000 [00:24<09:09, 43.64it/s]

Ep 1000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  4%|▍         | 1107/25000 [00:26<08:53, 44.82it/s]

Ep 1100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  5%|▍         | 1207/25000 [00:28<08:44, 45.38it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  5%|▌         | 1307/25000 [00:31<08:36, 45.91it/s]

Ep 1300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  6%|▌         | 1407/25000 [00:33<08:26, 46.62it/s]

Ep 1400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


  6%|▌         | 1507/25000 [00:35<10:48, 36.21it/s]

Ep 1500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  6%|▋         | 1605/25000 [00:38<09:26, 41.28it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


  7%|▋         | 1705/25000 [00:40<08:41, 44.65it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


  7%|▋         | 1805/25000 [00:43<08:29, 45.54it/s]

Ep 1800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  8%|▊         | 1905/25000 [00:45<08:23, 45.91it/s]

Ep 1900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  8%|▊         | 2005/25000 [00:47<08:21, 45.87it/s]

Ep 2000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  8%|▊         | 2106/25000 [00:50<10:46, 35.40it/s]

Ep 2100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  9%|▉         | 2208/25000 [00:53<09:33, 39.75it/s]

Ep 2200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  9%|▉         | 2308/25000 [00:55<08:29, 44.52it/s]

Ep 2300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 10%|▉         | 2405/25000 [00:57<10:16, 36.66it/s]

Ep 2400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 10%|█         | 2509/25000 [01:00<09:01, 41.51it/s]

Ep 2500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 10%|█         | 2604/25000 [01:02<08:40, 43.02it/s]

Ep 2600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 11%|█         | 2705/25000 [01:05<10:27, 35.51it/s]

Ep 2700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 11%|█         | 2808/25000 [01:08<08:04, 45.81it/s]

Ep 2800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 12%|█▏        | 2908/25000 [01:10<08:16, 44.47it/s]

Ep 2900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 12%|█▏        | 3008/25000 [01:12<08:00, 45.72it/s]

Ep 3000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 12%|█▏        | 3108/25000 [01:15<07:56, 45.90it/s]

Ep 3100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 13%|█▎        | 3208/25000 [01:17<08:13, 44.18it/s]

Ep 3200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 13%|█▎        | 3306/25000 [01:19<09:46, 37.00it/s]

Ep 3300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 14%|█▎        | 3405/25000 [01:22<08:22, 42.99it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 2


 14%|█▍        | 3505/25000 [01:24<07:59, 44.81it/s]

Ep 3500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 14%|█▍        | 3605/25000 [01:27<07:54, 45.05it/s]

Ep 3600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 15%|█▍        | 3705/25000 [01:29<07:38, 46.49it/s]

Ep 3700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 15%|█▌        | 3805/25000 [01:31<07:39, 46.08it/s]

Ep 3800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 16%|█▌        | 3907/25000 [01:34<09:21, 37.58it/s]

Ep 3900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 16%|█▌        | 4008/25000 [01:36<08:01, 43.56it/s]

Ep 4000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 2


 16%|█▋        | 4108/25000 [01:39<07:41, 45.30it/s]

Ep 4100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 17%|█▋        | 4208/25000 [01:41<07:37, 45.44it/s]

Ep 4200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 17%|█▋        | 4308/25000 [01:43<07:37, 45.18it/s]

Ep 4300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 18%|█▊        | 4408/25000 [01:45<07:35, 45.25it/s]

Ep 4400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 18%|█▊        | 4507/25000 [01:48<09:33, 35.75it/s]

Ep 4500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 2


 18%|█▊        | 4609/25000 [01:51<07:25, 45.78it/s]

Ep 4600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 19%|█▉        | 4709/25000 [01:53<07:27, 45.37it/s]

Ep 4700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 19%|█▉        | 4809/25000 [01:55<07:32, 44.66it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 20%|█▉        | 4909/25000 [01:57<07:22, 45.44it/s]

Ep 4900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 20%|██        | 5009/25000 [02:00<07:20, 45.43it/s]

Ep 5000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 20%|██        | 5104/25000 [02:02<09:28, 34.98it/s]

Ep 5100/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 2


 21%|██        | 5207/25000 [02:05<07:30, 43.94it/s]

Ep 5200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 21%|██        | 5307/25000 [02:07<07:20, 44.69it/s]

Ep 5300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 22%|██▏       | 5407/25000 [02:09<07:08, 45.68it/s]

Ep 5400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 22%|██▏       | 5507/25000 [02:12<07:12, 45.09it/s]

Ep 5500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 22%|██▏       | 5607/25000 [02:14<07:04, 45.73it/s]

Ep 5600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 23%|██▎       | 5707/25000 [02:17<08:34, 37.52it/s]

Ep 5700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 23%|██▎       | 5809/25000 [02:19<07:22, 43.33it/s]

Ep 5800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 24%|██▎       | 5909/25000 [02:22<06:45, 47.05it/s]

Ep 5900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 24%|██▍       | 6009/25000 [02:24<06:52, 46.05it/s]

Ep 6000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 24%|██▍       | 6104/25000 [02:26<07:13, 43.58it/s]

Ep 6100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 25%|██▍       | 6209/25000 [02:28<06:51, 45.61it/s]

Ep 6200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 25%|██▌       | 6304/25000 [02:31<08:42, 35.77it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 26%|██▌       | 6406/25000 [02:34<06:55, 44.77it/s]

Ep 6400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 26%|██▌       | 6506/25000 [02:36<06:56, 44.40it/s]

Ep 6500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 26%|██▋       | 6606/25000 [02:38<06:52, 44.61it/s]

Ep 6600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 27%|██▋       | 6706/25000 [02:40<06:48, 44.83it/s]

Ep 6700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 27%|██▋       | 6806/25000 [02:43<06:39, 45.58it/s]

Ep 6800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 28%|██▊       | 6905/25000 [02:45<08:14, 36.62it/s]

Ep 6900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 28%|██▊       | 7005/25000 [02:48<07:15, 41.30it/s]

Ep 7000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 28%|██▊       | 7105/25000 [02:50<06:47, 43.88it/s]

Ep 7100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 29%|██▉       | 7205/25000 [02:52<06:36, 44.85it/s]

Ep 7200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 29%|██▉       | 7305/25000 [02:55<06:35, 44.71it/s]

Ep 7300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 30%|██▉       | 7405/25000 [02:57<06:27, 45.45it/s]

Ep 7400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 30%|███       | 7506/25000 [03:00<07:50, 37.20it/s]

Ep 7500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 30%|███       | 7609/25000 [03:02<07:05, 40.89it/s]

Ep 7600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 31%|███       | 7709/25000 [03:05<06:22, 45.25it/s]

Ep 7700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 31%|███       | 7809/25000 [03:07<06:08, 46.59it/s]

Ep 7800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 32%|███▏      | 7904/25000 [03:09<06:36, 43.08it/s]

Ep 7900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 32%|███▏      | 8009/25000 [03:11<06:20, 44.71it/s]

Ep 8000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 32%|███▏      | 8106/25000 [03:14<07:52, 35.73it/s]

Ep 8100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 33%|███▎      | 8207/25000 [03:17<08:07, 34.44it/s]

Ep 8200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 33%|███▎      | 8307/25000 [03:19<06:15, 44.49it/s]

Ep 8300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 34%|███▎      | 8407/25000 [03:21<06:10, 44.80it/s]

Ep 8400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 34%|███▍      | 8507/25000 [03:24<05:53, 46.64it/s]

Ep 8500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 2


 34%|███▍      | 8607/25000 [03:26<06:01, 45.32it/s]

Ep 8600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 35%|███▍      | 8707/25000 [03:28<07:28, 36.33it/s]

Ep 8700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 35%|███▌      | 8803/25000 [03:31<07:48, 34.59it/s]

Ep 8800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 36%|███▌      | 8906/25000 [03:33<05:59, 44.75it/s]

Ep 8900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 36%|███▌      | 9006/25000 [03:36<06:00, 44.38it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 36%|███▋      | 9106/25000 [03:38<05:40, 46.67it/s]

Ep 9100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 37%|███▋      | 9206/25000 [03:40<05:43, 46.01it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 37%|███▋      | 9304/25000 [03:42<07:08, 36.60it/s]

Ep 9300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 38%|███▊      | 9404/25000 [03:45<07:27, 34.85it/s]

Ep 9400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 38%|███▊      | 9509/25000 [03:48<05:38, 45.78it/s]

Ep 9500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 38%|███▊      | 9609/25000 [03:50<05:36, 45.77it/s]

Ep 9600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 39%|███▉      | 9709/25000 [03:52<05:37, 45.29it/s]

Ep 9700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 39%|███▉      | 9809/25000 [03:54<05:34, 45.44it/s]

Ep 9800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 40%|███▉      | 9904/25000 [03:57<06:41, 37.56it/s]

Ep 9900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 40%|████      | 10005/25000 [04:00<07:28, 33.41it/s]

Ep 10000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 40%|████      | 10105/25000 [04:02<05:28, 45.35it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 41%|████      | 10205/25000 [04:04<05:33, 44.35it/s]

Ep 10200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 41%|████      | 10305/25000 [04:06<05:31, 44.33it/s]

Ep 10300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 42%|████▏     | 10405/25000 [04:09<05:32, 43.91it/s]

Ep 10400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 42%|████▏     | 10505/25000 [04:11<06:35, 36.69it/s]

Ep 10500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 42%|████▏     | 10605/25000 [04:14<07:21, 32.59it/s]

Ep 10600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 43%|████▎     | 10708/25000 [04:16<05:15, 45.32it/s]

Ep 10700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 43%|████▎     | 10808/25000 [04:19<05:10, 45.69it/s]

Ep 10800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 44%|████▎     | 10908/25000 [04:21<04:58, 47.24it/s]

Ep 10900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 44%|████▍     | 11008/25000 [04:23<05:12, 44.84it/s]

Ep 11000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 44%|████▍     | 11105/25000 [04:25<06:30, 35.58it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 45%|████▍     | 11205/25000 [04:28<06:45, 34.01it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 45%|████▌     | 11306/25000 [04:31<04:57, 46.05it/s]

Ep 11300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 46%|████▌     | 11406/25000 [04:33<04:59, 45.40it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 46%|████▌     | 11506/25000 [04:35<05:12, 43.14it/s]

Ep 11500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 46%|████▋     | 11606/25000 [04:37<04:56, 45.15it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 47%|████▋     | 11706/25000 [04:40<05:41, 38.89it/s]

Ep 11700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 47%|████▋     | 11806/25000 [04:43<06:15, 35.12it/s]

Ep 11800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 48%|████▊     | 11906/25000 [04:45<04:50, 45.09it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 48%|████▊     | 12006/25000 [04:47<04:52, 44.45it/s]

Ep 12000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 48%|████▊     | 12106/25000 [04:50<04:41, 45.82it/s]

Ep 12100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 49%|████▉     | 12206/25000 [04:52<04:44, 44.91it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 49%|████▉     | 12306/25000 [04:54<05:21, 39.50it/s]

Ep 12300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 50%|████▉     | 12406/25000 [04:57<05:52, 35.76it/s]

Ep 12400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 50%|█████     | 12507/25000 [04:59<04:33, 45.60it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 50%|█████     | 12607/25000 [05:02<04:29, 45.99it/s]

Ep 12600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 51%|█████     | 12707/25000 [05:04<04:39, 43.98it/s]

Ep 12700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 51%|█████     | 12808/25000 [05:07<05:24, 37.59it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 52%|█████▏    | 12904/25000 [05:09<05:47, 34.85it/s]

Ep 12900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 52%|█████▏    | 13006/25000 [05:12<06:05, 32.80it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 52%|█████▏    | 13109/25000 [05:15<04:29, 44.16it/s]

Ep 13100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 53%|█████▎    | 13204/25000 [05:17<04:23, 44.78it/s]

Ep 13200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 53%|█████▎    | 13309/25000 [05:19<04:20, 44.89it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 54%|█████▎    | 13409/25000 [05:22<04:20, 44.49it/s]

Ep 13400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 54%|█████▍    | 13504/25000 [05:24<05:32, 34.56it/s]

Ep 13500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13605/25000 [05:27<05:55, 32.06it/s]

Ep 13600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 2


 55%|█████▍    | 13708/25000 [05:29<04:28, 42.08it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 55%|█████▌    | 13808/25000 [05:32<04:06, 45.45it/s]

Ep 13800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 56%|█████▌    | 13908/25000 [05:34<04:06, 44.91it/s]

Ep 13900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 56%|█████▌    | 14008/25000 [05:36<04:19, 42.41it/s]

Ep 14000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 56%|█████▋    | 14107/25000 [05:39<04:46, 37.97it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 57%|█████▋    | 14203/25000 [05:41<05:27, 32.94it/s]

Ep 14200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 57%|█████▋    | 14306/25000 [05:44<04:02, 44.06it/s]

Ep 14300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 58%|█████▊    | 14406/25000 [05:46<03:55, 44.93it/s]

Ep 14400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 58%|█████▊    | 14506/25000 [05:48<03:55, 44.56it/s]

Ep 14500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 58%|█████▊    | 14606/25000 [05:51<03:52, 44.70it/s]

Ep 14600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 59%|█████▉    | 14706/25000 [05:53<04:46, 35.88it/s]

Ep 14700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 59%|█████▉    | 14806/25000 [05:56<05:08, 33.06it/s]

Ep 14800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 60%|█████▉    | 14908/25000 [05:58<03:48, 44.24it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 60%|██████    | 15008/25000 [06:01<03:41, 45.06it/s]

Ep 15000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 60%|██████    | 15108/25000 [06:03<03:37, 45.49it/s]

Ep 15100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 61%|██████    | 15208/25000 [06:05<03:35, 45.44it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 61%|██████    | 15306/25000 [06:07<04:34, 35.31it/s]

Ep 15300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 62%|██████▏   | 15404/25000 [06:10<04:56, 32.41it/s]

Ep 15400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 62%|██████▏   | 15509/25000 [06:13<03:34, 44.25it/s]

Ep 15500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 62%|██████▏   | 15609/25000 [06:15<03:26, 45.40it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 63%|██████▎   | 15709/25000 [06:17<03:30, 44.20it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 63%|██████▎   | 15809/25000 [06:19<03:20, 45.77it/s]

Ep 15800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 64%|██████▎   | 15903/25000 [06:22<04:02, 37.55it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 64%|██████▍   | 16003/25000 [06:24<04:14, 35.36it/s]

Ep 16000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 64%|██████▍   | 16109/25000 [06:27<03:21, 44.10it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 65%|██████▍   | 16209/25000 [06:29<03:18, 44.28it/s]

Ep 16200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 65%|██████▌   | 16309/25000 [06:32<03:14, 44.68it/s]

Ep 16300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 66%|██████▌   | 16409/25000 [06:34<03:09, 45.44it/s]

Ep 16400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 66%|██████▌   | 16506/25000 [06:36<03:52, 36.55it/s]

Ep 16500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 66%|██████▋   | 16606/25000 [06:39<04:13, 33.17it/s]

Ep 16600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 67%|██████▋   | 16705/25000 [06:41<03:03, 45.13it/s]

Ep 16700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 67%|██████▋   | 16805/25000 [06:44<02:58, 45.80it/s]

Ep 16800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 68%|██████▊   | 16905/25000 [06:46<02:54, 46.45it/s]

Ep 16900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 68%|██████▊   | 17005/25000 [06:48<03:01, 44.12it/s]

Ep 17000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 68%|██████▊   | 17105/25000 [06:50<03:17, 39.90it/s]

Ep 17100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 69%|██████▉   | 17206/25000 [06:53<04:01, 32.33it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 69%|██████▉   | 17308/25000 [06:56<02:49, 45.33it/s]

Ep 17300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 70%|██████▉   | 17408/25000 [06:58<02:49, 44.86it/s]

Ep 17400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 70%|███████   | 17508/25000 [07:00<02:42, 46.10it/s]

Ep 17500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 70%|███████   | 17608/25000 [07:03<02:45, 44.67it/s]

Ep 17600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 71%|███████   | 17708/25000 [07:05<02:43, 44.58it/s]

Ep 17700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 71%|███████   | 17807/25000 [07:08<03:28, 34.58it/s]

Ep 17800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 72%|███████▏  | 17907/25000 [07:10<02:40, 44.12it/s]

Ep 17900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 72%|███████▏  | 18007/25000 [07:13<02:32, 45.77it/s]

Ep 18000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 72%|███████▏  | 18107/25000 [07:15<02:32, 45.24it/s]

Ep 18100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 73%|███████▎  | 18207/25000 [07:17<02:25, 46.65it/s]

Ep 18200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 73%|███████▎  | 18307/25000 [07:19<02:31, 44.21it/s]

Ep 18300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 74%|███████▎  | 18406/25000 [07:22<02:51, 38.40it/s]

Ep 18400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 74%|███████▍  | 18505/25000 [07:25<02:29, 43.35it/s]

Ep 18500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 74%|███████▍  | 18605/25000 [07:27<02:23, 44.53it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 75%|███████▍  | 18705/25000 [07:29<02:19, 45.03it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 75%|███████▌  | 18805/25000 [07:31<02:18, 44.85it/s]

Ep 18800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 76%|███████▌  | 18905/25000 [07:33<02:12, 45.86it/s]

Ep 18900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 76%|███████▌  | 19006/25000 [07:36<02:49, 35.36it/s]

Ep 19000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 76%|███████▋  | 19106/25000 [07:39<02:23, 40.95it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 2


 77%|███████▋  | 19206/25000 [07:41<02:07, 45.29it/s]

Ep 19200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 77%|███████▋  | 19306/25000 [07:43<02:06, 44.84it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 78%|███████▊  | 19406/25000 [07:46<02:05, 44.71it/s]

Ep 19400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 78%|███████▊  | 19506/25000 [07:48<02:02, 44.93it/s]

Ep 19500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 78%|███████▊  | 19606/25000 [07:51<02:38, 34.05it/s]

Ep 19600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 79%|███████▉  | 19708/25000 [07:53<02:13, 39.62it/s]

Ep 19700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


 79%|███████▉  | 19808/25000 [07:56<01:54, 45.46it/s]

Ep 19800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 80%|███████▉  | 19908/25000 [07:58<01:59, 42.63it/s]

Ep 19900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 80%|████████  | 20008/25000 [08:00<01:54, 43.76it/s]

Ep 20000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 80%|████████  | 20108/25000 [08:02<01:46, 45.90it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 81%|████████  | 20205/25000 [08:05<02:13, 35.82it/s]

Ep 20200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 81%|████████  | 20308/25000 [08:08<01:54, 40.93it/s]

Ep 20300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 82%|████████▏ | 20408/25000 [08:10<01:42, 44.62it/s]

Ep 20400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 82%|████████▏ | 20508/25000 [08:12<01:36, 46.43it/s]

Ep 20500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 82%|████████▏ | 20608/25000 [08:15<01:37, 45.04it/s]

Ep 20600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 83%|████████▎ | 20708/25000 [08:17<01:34, 45.47it/s]

Ep 20700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 83%|████████▎ | 20805/25000 [08:19<01:52, 37.38it/s]

Ep 20800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 84%|████████▎ | 20905/25000 [08:22<02:03, 33.27it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 84%|████████▍ | 21008/25000 [08:25<01:28, 45.01it/s]

Ep 21000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 84%|████████▍ | 21108/25000 [08:27<01:25, 45.55it/s]

Ep 21100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 85%|████████▍ | 21208/25000 [08:29<01:22, 45.91it/s]

Ep 21200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 85%|████████▌ | 21308/25000 [08:31<01:22, 44.57it/s]

Ep 21300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 86%|████████▌ | 21404/25000 [08:34<01:40, 35.73it/s]

Ep 21400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 2


 86%|████████▌ | 21505/25000 [08:37<01:45, 33.04it/s]

Ep 21500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 86%|████████▋ | 21609/25000 [08:39<01:14, 45.39it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 87%|████████▋ | 21709/25000 [08:41<01:13, 44.79it/s]

Ep 21700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 87%|████████▋ | 21809/25000 [08:43<01:10, 45.22it/s]

Ep 21800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 88%|████████▊ | 21909/25000 [08:46<01:07, 45.64it/s]

Ep 21900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 88%|████████▊ | 22004/25000 [08:48<01:25, 34.94it/s]

Ep 22000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 88%|████████▊ | 22104/25000 [08:51<01:23, 34.74it/s]

Ep 22100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 89%|████████▉ | 22207/25000 [08:53<01:02, 44.59it/s]

Ep 22200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 89%|████████▉ | 22307/25000 [08:56<01:00, 44.79it/s]

Ep 22300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 90%|████████▉ | 22407/25000 [08:58<00:58, 44.57it/s]

Ep 22400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 90%|█████████ | 22507/25000 [09:00<00:55, 44.54it/s]

Ep 22500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 90%|█████████ | 22607/25000 [09:03<01:06, 35.98it/s]

Ep 22600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 91%|█████████ | 22703/25000 [09:06<01:08, 33.39it/s]

Ep 22700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 91%|█████████ | 22806/25000 [09:08<00:49, 44.46it/s]

Ep 22800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 92%|█████████▏| 22904/25000 [09:10<00:57, 36.24it/s]

Ep 22900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 92%|█████████▏| 23006/25000 [09:13<00:57, 34.66it/s]

Ep 23000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 92%|█████████▏| 23105/25000 [09:16<00:41, 45.34it/s]

Ep 23100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 93%|█████████▎| 23206/25000 [09:18<00:49, 36.42it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 93%|█████████▎| 23308/25000 [09:21<00:38, 43.60it/s]

Ep 23300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 94%|█████████▎| 23408/25000 [09:23<00:35, 45.30it/s]

Ep 23400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 94%|█████████▍| 23508/25000 [09:26<00:33, 44.21it/s]

Ep 23500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 94%|█████████▍| 23608/25000 [09:28<00:31, 44.57it/s]

Ep 23600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 95%|█████████▍| 23708/25000 [09:30<00:29, 44.52it/s]

Ep 23700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 95%|█████████▌| 23807/25000 [09:33<00:31, 37.29it/s]

Ep 23800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 96%|█████████▌| 23908/25000 [09:36<00:25, 43.38it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 96%|█████████▌| 24008/25000 [09:38<00:21, 45.90it/s]

Ep 24000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 96%|█████████▋| 24108/25000 [09:40<00:19, 44.73it/s]

Ep 24100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 97%|█████████▋| 24208/25000 [09:42<00:18, 43.26it/s]

Ep 24200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 97%|█████████▋| 24308/25000 [09:45<00:15, 45.47it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 98%|█████████▊| 24405/25000 [09:47<00:17, 33.96it/s]

Ep 24400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 98%|█████████▊| 24509/25000 [09:50<00:11, 44.23it/s]

Ep 24500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 98%|█████████▊| 24609/25000 [09:52<00:08, 44.55it/s]

Ep 24600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 99%|█████████▉| 24709/25000 [09:54<00:06, 45.67it/s]

Ep 24700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 99%|█████████▉| 24809/25000 [09:57<00:04, 45.78it/s]

Ep 24800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


100%|█████████▉| 24909/25000 [09:59<00:02, 43.51it/s]

Ep 24900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


100%|██████████| 25000/25000 [10:02<00:00, 41.53it/s]


Ep 25000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2

TEST:


 41%|████      | 122/300 [00:01<00:01, 107.38it/s]

Ep 100/300, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 72%|███████▏  | 217/300 [00:02<00:00, 120.89it/s]

Ep 200/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.6, First Action 2


100%|██████████| 300/300 [00:02<00:00, 112.46it/s]


Ep 300/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2

GAMMA 0.9 - LR 0.001 - Entropy Decay False


  0%|          | 108/25000 [00:02<09:09, 45.29it/s]

Ep 100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


  1%|          | 208/25000 [00:04<09:19, 44.33it/s]

Ep 200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


  1%|          | 308/25000 [00:06<09:15, 44.45it/s]

Ep 300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


  2%|▏         | 408/25000 [00:09<09:06, 45.01it/s]

Ep 400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


  2%|▏         | 504/25000 [00:11<11:00, 37.06it/s]

Ep 500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


  2%|▏         | 607/25000 [00:14<09:28, 42.88it/s]

Ep 600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


  3%|▎         | 707/25000 [00:16<09:00, 44.90it/s]

Ep 700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 9


  3%|▎         | 807/25000 [00:19<08:52, 45.42it/s]

Ep 800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  4%|▎         | 907/25000 [00:21<08:59, 44.67it/s]

Ep 900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


  4%|▍         | 1007/25000 [00:23<09:11, 43.54it/s]

Ep 1000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


  4%|▍         | 1104/25000 [00:26<10:27, 38.05it/s]

Ep 1100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  5%|▍         | 1205/25000 [00:29<09:02, 43.87it/s]

Ep 1200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 9


  5%|▌         | 1305/25000 [00:31<08:48, 44.81it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 9


  6%|▌         | 1405/25000 [00:33<08:45, 44.94it/s]

Ep 1400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


  6%|▌         | 1505/25000 [00:35<08:39, 45.20it/s]

Ep 1500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


  6%|▋         | 1605/25000 [00:38<08:48, 44.28it/s]

Ep 1600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


  7%|▋         | 1705/25000 [00:40<10:30, 36.98it/s]

Ep 1700/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 9


  7%|▋         | 1806/25000 [00:43<09:23, 41.18it/s]

Ep 1800/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.5, First Action 9


  8%|▊         | 1906/25000 [00:45<08:34, 44.86it/s]

Ep 1900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  8%|▊         | 2006/25000 [00:48<08:18, 46.17it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  8%|▊         | 2106/25000 [00:50<08:23, 45.48it/s]

Ep 2100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


  9%|▉         | 2206/25000 [00:52<08:18, 45.76it/s]

Ep 2200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  9%|▉         | 2304/25000 [00:55<10:23, 36.39it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 9


 10%|▉         | 2406/25000 [00:58<08:39, 43.50it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 10%|█         | 2506/25000 [01:00<08:20, 44.94it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 10%|█         | 2606/25000 [01:02<08:31, 43.79it/s]

Ep 2600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 11%|█         | 2706/25000 [01:04<08:18, 44.76it/s]

Ep 2700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 11%|█         | 2806/25000 [01:06<08:16, 44.72it/s]

Ep 2800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 12%|█▏        | 2904/25000 [01:09<10:01, 36.75it/s]

Ep 2900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.6, First Action 2


 12%|█▏        | 3005/25000 [01:12<08:19, 44.04it/s]

Ep 3000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 12%|█▏        | 3105/25000 [01:14<08:19, 43.87it/s]

Ep 3100/25000, Opt. Action: 7, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.4, First Action 10


 13%|█▎        | 3205/25000 [01:16<08:10, 44.43it/s]

Ep 3200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 13%|█▎        | 3305/25000 [01:19<08:04, 44.78it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 14%|█▎        | 3405/25000 [01:21<07:50, 45.92it/s]

Ep 3400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 2


 14%|█▍        | 3506/25000 [01:24<09:58, 35.93it/s]

Ep 3500/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 5


 14%|█▍        | 3609/25000 [01:26<08:41, 41.01it/s]

Ep 3600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 15%|█▍        | 3709/25000 [01:29<07:42, 46.01it/s]

Ep 3700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 0


 15%|█▌        | 3804/25000 [01:31<07:57, 44.35it/s]

Ep 3800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 9


 16%|█▌        | 3903/25000 [01:33<11:03, 31.81it/s]

Ep 3900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 16%|█▌        | 4007/25000 [01:36<07:56, 44.02it/s]

Ep 4000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 16%|█▋        | 4106/25000 [01:38<09:24, 37.02it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 17%|█▋        | 4203/25000 [01:41<10:20, 33.50it/s]

Ep 4200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 4


 17%|█▋        | 4307/25000 [01:43<07:47, 44.27it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


 18%|█▊        | 4407/25000 [01:46<07:54, 43.41it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 18%|█▊        | 4507/25000 [01:48<07:44, 44.13it/s]

Ep 4500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 18%|█▊        | 4607/25000 [01:50<07:35, 44.80it/s]

Ep 4600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 19%|█▉        | 4704/25000 [01:53<08:46, 38.55it/s]

Ep 4700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 19%|█▉        | 4804/25000 [01:55<09:58, 33.76it/s]

Ep 4800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 20%|█▉        | 4907/25000 [01:58<07:21, 45.51it/s]

Ep 4900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 20%|██        | 5007/25000 [02:00<07:37, 43.69it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 4


 20%|██        | 5107/25000 [02:02<07:33, 43.91it/s]

Ep 5100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 2


 21%|██        | 5207/25000 [02:05<07:12, 45.78it/s]

Ep 5200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.5, First Action 2


 21%|██        | 5307/25000 [02:07<09:02, 36.27it/s]

Ep 5300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 22%|██▏       | 5403/25000 [02:10<09:29, 34.39it/s]

Ep 5400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 22%|██▏       | 5506/25000 [02:12<07:15, 44.72it/s]

Ep 5500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 2


 22%|██▏       | 5606/25000 [02:15<07:13, 44.71it/s]

Ep 5600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 2


 23%|██▎       | 5706/25000 [02:17<07:12, 44.57it/s]

Ep 5700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 23%|██▎       | 5806/25000 [02:19<07:22, 43.35it/s]

Ep 5800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 24%|██▎       | 5905/25000 [02:22<08:20, 38.12it/s]

Ep 5900/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.8, First Action 2


 24%|██▍       | 6005/25000 [02:25<09:00, 35.14it/s]

Ep 6000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 2


 24%|██▍       | 6105/25000 [02:27<07:06, 44.33it/s]

Ep 6100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 2


 25%|██▍       | 6205/25000 [02:29<07:17, 42.96it/s]

Ep 6200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 25%|██▌       | 6305/25000 [02:32<07:02, 44.23it/s]

Ep 6300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 26%|██▌       | 6405/25000 [02:34<06:48, 45.57it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 2


 26%|██▌       | 6507/25000 [02:36<08:20, 36.94it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 2


 26%|██▋       | 6604/25000 [02:39<09:15, 33.14it/s]

Ep 6600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 2


 27%|██▋       | 6705/25000 [02:42<06:36, 46.16it/s]

Ep 6700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 2


 27%|██▋       | 6805/25000 [02:44<07:00, 43.30it/s]

Ep 6800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 28%|██▊       | 6905/25000 [02:46<06:54, 43.70it/s]

Ep 6900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 2


 28%|██▊       | 7005/25000 [02:48<06:39, 45.02it/s]

Ep 7000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.7, First Action 2


 28%|██▊       | 7106/25000 [02:51<08:08, 36.59it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 29%|██▉       | 7206/25000 [02:54<09:00, 32.92it/s]

Ep 7200/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.4, First Action 2


 29%|██▉       | 7305/25000 [02:56<06:52, 42.94it/s]

Ep 7300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 2


 30%|██▉       | 7405/25000 [02:58<06:35, 44.47it/s]

Ep 7400/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.3, First Action 2


 30%|███       | 7505/25000 [03:01<06:31, 44.67it/s]

Ep 7500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 2


 30%|███       | 7605/25000 [03:03<06:25, 45.13it/s]

Ep 7600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 2


 31%|███       | 7705/25000 [03:05<07:24, 38.94it/s]

Ep 7700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 31%|███       | 7805/25000 [03:08<08:20, 34.35it/s]

Ep 7800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 2


 32%|███▏      | 7905/25000 [03:11<08:01, 35.49it/s]

Ep 7900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 32%|███▏      | 8005/25000 [03:14<08:25, 33.61it/s]

Ep 8000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 2


 32%|███▏      | 8105/25000 [03:16<06:32, 43.00it/s]

Ep 8100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 2


 33%|███▎      | 8205/25000 [03:19<06:18, 44.39it/s]

Ep 8200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 2


 33%|███▎      | 8307/25000 [03:21<07:35, 36.64it/s]

Ep 8300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 34%|███▎      | 8407/25000 [03:24<08:12, 33.69it/s]

Ep 8400/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 2


 34%|███▍      | 8508/25000 [03:26<06:18, 43.58it/s]

Ep 8500/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 2


 34%|███▍      | 8608/25000 [03:29<06:03, 45.15it/s]

Ep 8600/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 14.2, First Action 2


 35%|███▍      | 8708/25000 [03:31<06:16, 43.26it/s]

Ep 8700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.2, First Action 2


 35%|███▌      | 8808/25000 [03:33<06:16, 42.98it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 2


 36%|███▌      | 8905/25000 [03:36<07:01, 38.18it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 2


 36%|███▌      | 9006/25000 [03:39<08:04, 33.00it/s]

Ep 9000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 2


 36%|███▋      | 9109/25000 [03:41<05:53, 44.91it/s]

Ep 9100/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.2, First Action 2


 37%|███▋      | 9209/25000 [03:43<05:55, 44.42it/s]

Ep 9200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.6, First Action 2


 37%|███▋      | 9309/25000 [03:45<05:57, 43.92it/s]

Ep 9300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 2


 38%|███▊      | 9409/25000 [03:48<05:43, 45.34it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 2


 38%|███▊      | 9505/25000 [03:50<07:16, 35.47it/s]

Ep 9500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 2


 38%|███▊      | 9606/25000 [03:53<07:31, 34.07it/s]

Ep 9600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 2


 39%|███▉      | 9707/25000 [03:55<05:34, 45.68it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 2


 39%|███▉      | 9807/25000 [03:58<05:38, 44.84it/s]

Ep 9800/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 14.5, First Action 2


 40%|███▉      | 9907/25000 [04:00<05:32, 45.40it/s]

Ep 9900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 2


 40%|████      | 10007/25000 [04:02<05:43, 43.62it/s]

Ep 10000/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 2


 40%|████      | 10105/25000 [04:05<06:35, 37.66it/s]

Ep 10100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.2, First Action 2


 41%|████      | 10206/25000 [04:07<07:16, 33.89it/s]

Ep 10200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 2


 41%|████      | 10304/25000 [04:10<05:35, 43.77it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 2


 42%|████▏     | 10409/25000 [04:12<05:25, 44.79it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 2


 42%|████▏     | 10509/25000 [04:15<05:17, 45.58it/s]

Ep 10500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 2


 42%|████▏     | 10609/25000 [04:17<05:24, 44.30it/s]

Ep 10600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 43%|████▎     | 10704/25000 [04:19<05:37, 42.38it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 4


 43%|████▎     | 10804/25000 [04:22<06:36, 35.79it/s]

Ep 10800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 44%|████▎     | 10905/25000 [04:24<05:17, 44.34it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 44%|████▍     | 11005/25000 [04:27<05:08, 45.43it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 44%|████▍     | 11105/25000 [04:29<05:06, 45.27it/s]

Ep 11100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 45%|████▍     | 11205/25000 [04:31<05:12, 44.15it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.5, First Action 4


 45%|████▌     | 11305/25000 [04:33<05:15, 43.40it/s]

Ep 11300/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 4


 46%|████▌     | 11406/25000 [04:36<06:35, 34.39it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 46%|████▌     | 11506/25000 [04:39<05:08, 43.80it/s]

Ep 11500/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.6, First Action 4


 46%|████▋     | 11606/25000 [04:41<05:03, 44.08it/s]

Ep 11600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.0, First Action 4


 47%|████▋     | 11706/25000 [04:43<04:58, 44.47it/s]

Ep 11700/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.1, First Action 4


 47%|████▋     | 11806/25000 [04:46<04:51, 45.29it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 48%|████▊     | 11906/25000 [04:48<04:49, 45.23it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 48%|████▊     | 12006/25000 [04:51<05:53, 36.77it/s]

Ep 12000/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.0, First Action 4


 48%|████▊     | 12105/25000 [04:53<04:55, 43.62it/s]

Ep 12100/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.6, First Action 4


 49%|████▉     | 12205/25000 [04:56<04:45, 44.87it/s]

Ep 12200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 49%|████▉     | 12305/25000 [04:58<04:43, 44.86it/s]

Ep 12300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 50%|████▉     | 12405/25000 [05:00<04:41, 44.77it/s]

Ep 12400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 50%|█████     | 12505/25000 [05:02<04:41, 44.39it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 4


 50%|█████     | 12604/25000 [05:05<05:45, 35.85it/s]

Ep 12600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 51%|█████     | 12706/25000 [05:08<04:41, 43.69it/s]

Ep 12700/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.5, First Action 4


 51%|█████     | 12806/25000 [05:10<04:36, 44.03it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 52%|█████▏    | 12906/25000 [05:12<04:30, 44.68it/s]

Ep 12900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 4


 52%|█████▏    | 13006/25000 [05:15<04:23, 45.57it/s]

Ep 13000/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.4, First Action 4


 52%|█████▏    | 13106/25000 [05:17<04:22, 45.28it/s]

Ep 13100/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.6, First Action 4


 53%|█████▎    | 13207/25000 [05:20<05:34, 35.30it/s]

Ep 13200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 4


 53%|█████▎    | 13308/25000 [05:23<04:33, 42.80it/s]

Ep 13300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 54%|█████▎    | 13408/25000 [05:25<04:20, 44.46it/s]

Ep 13400/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 4


 54%|█████▍    | 13508/25000 [05:27<04:25, 43.34it/s]

Ep 13500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 54%|█████▍    | 13608/25000 [05:29<04:14, 44.70it/s]

Ep 13600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 55%|█████▍    | 13708/25000 [05:32<04:21, 43.17it/s]

Ep 13700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 55%|█████▌    | 13804/25000 [05:34<05:30, 33.88it/s]

Ep 13800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 56%|█████▌    | 13907/25000 [05:37<04:37, 39.92it/s]

Ep 13900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 56%|█████▌    | 14006/25000 [05:39<04:09, 44.06it/s]

Ep 14000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 56%|█████▋    | 14106/25000 [05:42<03:59, 45.42it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 4


 57%|█████▋    | 14206/25000 [05:44<04:05, 43.94it/s]

Ep 14200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.0, First Action 4


 57%|█████▋    | 14306/25000 [05:46<04:05, 43.48it/s]

Ep 14300/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 58%|█████▊    | 14405/25000 [05:49<04:48, 36.79it/s]

Ep 14400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 58%|█████▊    | 14509/25000 [05:52<04:18, 40.53it/s]

Ep 14500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.5, First Action 4


 58%|█████▊    | 14609/25000 [05:54<03:55, 44.15it/s]

Ep 14600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 4


 59%|█████▉    | 14709/25000 [05:56<03:48, 44.98it/s]

Ep 14700/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 4


 59%|█████▉    | 14809/25000 [05:59<03:44, 45.33it/s]

Ep 14800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.1, First Action 4


 60%|█████▉    | 14904/25000 [06:01<03:49, 44.08it/s]

Ep 14900/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 4


 60%|██████    | 15004/25000 [06:03<04:23, 37.90it/s]

Ep 15000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 4


 60%|██████    | 15105/25000 [06:06<04:38, 35.51it/s]

Ep 15100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 4


 61%|██████    | 15207/25000 [06:09<03:40, 44.33it/s]

Ep 15200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 4


 61%|██████    | 15307/25000 [06:11<03:36, 44.77it/s]

Ep 15300/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 4


 62%|██████▏   | 15407/25000 [06:13<03:39, 43.69it/s]

Ep 15400/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.9, First Action 4


 62%|██████▏   | 15507/25000 [06:15<03:36, 43.89it/s]

Ep 15500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 62%|██████▏   | 15607/25000 [06:18<04:18, 36.31it/s]

Ep 15600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 4


 63%|██████▎   | 15704/25000 [06:21<04:36, 33.64it/s]

Ep 15700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 63%|██████▎   | 15808/25000 [06:23<03:27, 44.30it/s]

Ep 15800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 4


 64%|██████▎   | 15908/25000 [06:26<03:21, 45.20it/s]

Ep 15900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 64%|██████▍   | 16008/25000 [06:28<03:21, 44.71it/s]

Ep 16000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 64%|██████▍   | 16108/25000 [06:30<03:26, 43.15it/s]

Ep 16100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 65%|██████▍   | 16205/25000 [06:32<03:54, 37.45it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 65%|██████▌   | 16305/25000 [06:35<04:29, 32.32it/s]

Ep 16300/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.8, First Action 4


 66%|██████▌   | 16407/25000 [06:38<03:11, 44.77it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 4


 66%|██████▌   | 16507/25000 [06:40<03:12, 44.05it/s]

Ep 16500/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.3, First Action 4


 66%|██████▋   | 16607/25000 [06:42<03:09, 44.38it/s]

Ep 16600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 4


 67%|██████▋   | 16707/25000 [06:45<03:09, 43.87it/s]

Ep 16700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 67%|██████▋   | 16807/25000 [06:47<03:40, 37.15it/s]

Ep 16800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 4


 68%|██████▊   | 16904/25000 [06:50<04:19, 31.16it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 68%|██████▊   | 17009/25000 [06:53<02:56, 45.21it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 68%|██████▊   | 17109/25000 [06:55<02:53, 45.53it/s]

Ep 17100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 69%|██████▉   | 17209/25000 [06:57<02:56, 44.15it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 69%|██████▉   | 17304/25000 [06:59<02:57, 43.45it/s]

Ep 17300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 70%|██████▉   | 17406/25000 [07:02<03:30, 35.99it/s]

Ep 17400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 4


 70%|███████   | 17506/25000 [07:05<03:47, 32.96it/s]

Ep 17500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 4


 70%|███████   | 17605/25000 [07:07<02:47, 44.09it/s]

Ep 17600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.3, First Action 4


 71%|███████   | 17705/25000 [07:09<02:45, 44.05it/s]

Ep 17700/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 4


 71%|███████   | 17805/25000 [07:12<03:23, 35.42it/s]

Ep 17800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.9, First Action 4


 72%|███████▏  | 17905/25000 [07:15<03:25, 34.48it/s]

Ep 17900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 4


 72%|███████▏  | 18005/25000 [07:18<03:16, 35.51it/s]

Ep 18000/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.7, First Action 4


 72%|███████▏  | 18105/25000 [07:20<03:23, 33.89it/s]

Ep 18100/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 4


 73%|███████▎  | 18207/25000 [07:23<02:32, 44.61it/s]

Ep 18200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 4


 73%|███████▎  | 18307/25000 [07:25<02:31, 44.20it/s]

Ep 18300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 74%|███████▎  | 18407/25000 [07:27<02:30, 43.90it/s]

Ep 18400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 4


 74%|███████▍  | 18507/25000 [07:30<02:24, 44.89it/s]

Ep 18500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 74%|███████▍  | 18607/25000 [07:32<03:01, 35.27it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 75%|███████▍  | 18707/25000 [07:35<03:03, 34.28it/s]

Ep 18700/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 4


 75%|███████▌  | 18809/25000 [07:38<02:18, 44.59it/s]

Ep 18800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 76%|███████▌  | 18909/25000 [07:40<02:16, 44.68it/s]

Ep 18900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 4


 76%|███████▌  | 19004/25000 [07:42<02:13, 45.05it/s]

Ep 19000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 76%|███████▋  | 19109/25000 [07:44<02:16, 43.21it/s]

Ep 19100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 77%|███████▋  | 19205/25000 [07:47<02:44, 35.28it/s]

Ep 19200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 4


 77%|███████▋  | 19305/25000 [07:50<02:43, 34.81it/s]

Ep 19300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 78%|███████▊  | 19405/25000 [07:52<02:07, 43.90it/s]

Ep 19400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 78%|███████▊  | 19505/25000 [07:54<02:03, 44.34it/s]

Ep 19500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 4


 78%|███████▊  | 19605/25000 [07:57<02:01, 44.52it/s]

Ep 19600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 79%|███████▉  | 19705/25000 [07:59<01:58, 44.83it/s]

Ep 19700/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 4


 79%|███████▉  | 19807/25000 [08:01<02:27, 35.31it/s]

Ep 19800/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.7, First Action 4


 80%|███████▉  | 19904/25000 [08:04<02:41, 31.48it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 4


 80%|████████  | 20007/25000 [08:07<01:52, 44.40it/s]

Ep 20000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 80%|████████  | 20107/25000 [08:09<01:52, 43.37it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 81%|████████  | 20207/25000 [08:11<01:47, 44.68it/s]

Ep 20200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.4, First Action 4


 81%|████████  | 20307/25000 [08:14<01:48, 43.22it/s]

Ep 20300/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 82%|████████▏ | 20405/25000 [08:16<02:16, 33.69it/s]

Ep 20400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 82%|████████▏ | 20506/25000 [08:19<02:12, 34.01it/s]

Ep 20500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 4


 82%|████████▏ | 20608/25000 [08:22<01:38, 44.48it/s]

Ep 20600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 83%|████████▎ | 20708/25000 [08:24<01:35, 44.88it/s]

Ep 20700/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.1, First Action 4


 83%|████████▎ | 20808/25000 [08:26<01:34, 44.54it/s]

Ep 20800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 84%|████████▎ | 20908/25000 [08:28<01:34, 43.14it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 84%|████████▍ | 21007/25000 [08:31<01:49, 36.57it/s]

Ep 21000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 84%|████████▍ | 21105/25000 [08:34<02:03, 31.56it/s]

Ep 21100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 85%|████████▍ | 21207/25000 [08:36<01:23, 45.32it/s]

Ep 21200/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.9, First Action 4


 85%|████████▌ | 21307/25000 [08:38<01:21, 45.34it/s]

Ep 21300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 4


 86%|████████▌ | 21407/25000 [08:41<01:20, 44.78it/s]

Ep 21400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 86%|████████▌ | 21507/25000 [08:43<01:18, 44.72it/s]

Ep 21500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 4


 86%|████████▋ | 21604/25000 [08:45<01:37, 34.88it/s]

Ep 21600/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.1, First Action 4


 87%|████████▋ | 21704/25000 [08:48<01:40, 32.88it/s]

Ep 21700/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 4


 87%|████████▋ | 21805/25000 [08:51<01:11, 44.76it/s]

Ep 21800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 88%|████████▊ | 21905/25000 [08:53<01:08, 44.86it/s]

Ep 21900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 88%|████████▊ | 22005/25000 [08:55<01:07, 44.25it/s]

Ep 22000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 4


 88%|████████▊ | 22105/25000 [08:57<01:05, 44.31it/s]

Ep 22100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 89%|████████▉ | 22206/25000 [09:00<01:19, 35.36it/s]

Ep 22200/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 4


 89%|████████▉ | 22306/25000 [09:03<01:22, 32.63it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 4


 90%|████████▉ | 22409/25000 [09:05<00:57, 44.85it/s]

Ep 22400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.9, First Action 4


 90%|█████████ | 22509/25000 [09:08<00:55, 45.24it/s]

Ep 22500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.8, First Action 4


 90%|█████████ | 22604/25000 [09:10<00:53, 44.57it/s]

Ep 22600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 4


 91%|█████████ | 22704/25000 [09:12<00:53, 42.92it/s]

Ep 22700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 4


 91%|█████████ | 22804/25000 [09:14<00:57, 38.23it/s]

Ep 22800/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.8, First Action 4


 92%|█████████▏| 22905/25000 [09:17<01:01, 34.18it/s]

Ep 22900/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.8, First Action 4


 92%|█████████▏| 23007/25000 [09:20<00:44, 44.61it/s]

Ep 23000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 4


 92%|█████████▏| 23107/25000 [09:22<00:43, 43.52it/s]

Ep 23100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 4


 93%|█████████▎| 23207/25000 [09:24<00:40, 43.94it/s]

Ep 23200/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 4


 93%|█████████▎| 23307/25000 [09:27<00:37, 44.71it/s]

Ep 23300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 94%|█████████▎| 23406/25000 [09:29<00:44, 36.20it/s]

Ep 23400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 4


 94%|█████████▍| 23506/25000 [09:32<00:45, 32.61it/s]

Ep 23500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 4


 94%|█████████▍| 23606/25000 [09:35<00:31, 44.20it/s]

Ep 23600/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 4


 95%|█████████▍| 23706/25000 [09:37<00:28, 44.63it/s]

Ep 23700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 95%|█████████▌| 23806/25000 [09:39<00:27, 43.49it/s]

Ep 23800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 4


 96%|█████████▌| 23906/25000 [09:42<00:24, 45.21it/s]

Ep 23900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 96%|█████████▌| 24006/25000 [09:44<00:25, 39.13it/s]

Ep 24000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 96%|█████████▋| 24106/25000 [09:47<00:26, 34.25it/s]

Ep 24100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4


 97%|█████████▋| 24208/25000 [09:49<00:17, 44.64it/s]

Ep 24200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 97%|█████████▋| 24308/25000 [09:52<00:15, 44.03it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 4


 98%|█████████▊| 24408/25000 [09:54<00:13, 43.05it/s]

Ep 24400/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.6, First Action 4


 98%|█████████▊| 24508/25000 [09:56<00:11, 43.36it/s]

Ep 24500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 4


 98%|█████████▊| 24603/25000 [09:58<00:09, 40.58it/s]

Ep 24600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 4


 99%|█████████▉| 24704/25000 [10:01<00:08, 34.79it/s]

Ep 24700/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.1, First Action 4


 99%|█████████▉| 24804/25000 [10:04<00:04, 42.79it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 4


100%|█████████▉| 24909/25000 [10:06<00:02, 44.33it/s]

Ep 24900/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 4


100%|██████████| 25000/25000 [10:09<00:00, 41.05it/s]


Ep 25000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 4

TEST:


 43%|████▎     | 128/300 [00:00<00:01, 141.10it/s]

Ep 100/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 4


 73%|███████▎  | 218/300 [00:01<00:00, 141.44it/s]

Ep 200/300, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.4, First Action 4


100%|██████████| 300/300 [00:02<00:00, 137.79it/s]


Ep 300/300, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 4

GAMMA 1 - LR 0.0001 - Entropy Decay True


  0%|          | 106/25000 [00:02<11:10, 37.10it/s]

Ep 100/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.6, First Action 9


  1%|          | 204/25000 [00:05<12:28, 33.11it/s]

Ep 200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 6


  1%|          | 308/25000 [00:08<09:36, 42.81it/s]

Ep 300/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 18.0, First Action 10


  2%|▏         | 408/25000 [00:10<09:19, 43.97it/s]

Ep 400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  2%|▏         | 508/25000 [00:12<09:10, 44.48it/s]

Ep 500/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 0


  2%|▏         | 608/25000 [00:14<09:01, 45.03it/s]

Ep 600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 3


  3%|▎         | 708/25000 [00:17<09:40, 41.85it/s]

Ep 700/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 17.9, First Action 2


  3%|▎         | 805/25000 [00:19<11:06, 36.31it/s]

Ep 800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  4%|▎         | 908/25000 [00:22<09:09, 43.88it/s]

Ep 900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  4%|▍         | 1008/25000 [00:25<09:09, 43.65it/s]

Ep 1000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


  4%|▍         | 1108/25000 [00:27<08:58, 44.38it/s]

Ep 1100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


  5%|▍         | 1208/25000 [00:29<09:14, 42.89it/s]

Ep 1200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


  5%|▌         | 1308/25000 [00:31<08:58, 44.03it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 8


  6%|▌         | 1406/25000 [00:34<10:29, 37.51it/s]

Ep 1400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 3


  6%|▌         | 1507/25000 [00:37<09:17, 42.18it/s]

Ep 1500/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 2


  6%|▋         | 1607/25000 [00:39<08:53, 43.81it/s]

Ep 1600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  7%|▋         | 1707/25000 [00:42<09:05, 42.71it/s]

Ep 1700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 2


  7%|▋         | 1807/25000 [00:44<08:45, 44.12it/s]

Ep 1800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


  8%|▊         | 1907/25000 [00:46<08:46, 43.88it/s]

Ep 1900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


  8%|▊         | 2006/25000 [00:49<10:37, 36.08it/s]

Ep 2000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


  8%|▊         | 2107/25000 [00:52<09:08, 41.72it/s]

Ep 2100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


  9%|▉         | 2207/25000 [00:54<09:07, 41.65it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  9%|▉         | 2307/25000 [00:56<08:33, 44.20it/s]

Ep 2300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 10%|▉         | 2407/25000 [00:59<08:41, 43.30it/s]

Ep 2400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 2


 10%|█         | 2507/25000 [01:01<08:34, 43.73it/s]

Ep 2500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 10%|█         | 2605/25000 [01:04<11:35, 32.18it/s]

Ep 2600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 6


 11%|█         | 2705/25000 [01:07<11:13, 33.10it/s]

Ep 2700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 11%|█         | 2805/25000 [01:10<11:41, 31.66it/s]

Ep 2800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 2


 12%|█▏        | 2904/25000 [01:12<08:26, 43.65it/s]

Ep 2900/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 7


 12%|█▏        | 3009/25000 [01:15<08:10, 44.82it/s]

Ep 3000/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 17.9, First Action 10


 12%|█▏        | 3104/25000 [01:17<08:16, 44.13it/s]

Ep 3100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 13%|█▎        | 3209/25000 [01:19<08:11, 44.31it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 13%|█▎        | 3307/25000 [01:21<09:48, 36.86it/s]

Ep 3300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 14%|█▎        | 3407/25000 [01:24<10:31, 34.19it/s]

Ep 3400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 5


 14%|█▍        | 3506/25000 [01:27<08:12, 43.67it/s]

Ep 3500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 0


 14%|█▍        | 3606/25000 [01:29<08:04, 44.18it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 5


 15%|█▍        | 3706/25000 [01:31<07:52, 45.06it/s]

Ep 3700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 15%|█▌        | 3806/25000 [01:34<07:58, 44.33it/s]

Ep 3800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 5


 16%|█▌        | 3907/25000 [01:36<09:23, 37.41it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 16%|█▌        | 4003/25000 [01:39<10:17, 33.98it/s]

Ep 4000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 16%|█▋        | 4108/25000 [01:41<07:46, 44.77it/s]

Ep 4100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 17%|█▋        | 4208/25000 [01:44<07:53, 43.95it/s]

Ep 4200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 17%|█▋        | 4308/25000 [01:46<07:41, 44.88it/s]

Ep 4300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 18%|█▊        | 4408/25000 [01:48<07:42, 44.50it/s]

Ep 4400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 2


 18%|█▊        | 4506/25000 [01:50<09:09, 37.29it/s]

Ep 4500/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.1, First Action 10


 18%|█▊        | 4603/25000 [01:53<09:45, 34.85it/s]

Ep 4600/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 5


 19%|█▉        | 4707/25000 [01:56<07:45, 43.55it/s]

Ep 4700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 19%|█▉        | 4807/25000 [01:58<07:36, 44.23it/s]

Ep 4800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 20%|█▉        | 4907/25000 [02:00<07:26, 44.99it/s]

Ep 4900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 20%|██        | 5007/25000 [02:03<07:25, 44.88it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 20%|██        | 5107/25000 [02:05<07:26, 44.60it/s]

Ep 5100/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 2


 21%|██        | 5207/25000 [02:08<09:26, 34.92it/s]

Ep 5200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 4


 21%|██        | 5305/25000 [02:10<07:32, 43.55it/s]

Ep 5300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 22%|██▏       | 5405/25000 [02:13<07:28, 43.65it/s]

Ep 5400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 22%|██▏       | 5505/25000 [02:15<07:25, 43.78it/s]

Ep 5500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 22%|██▏       | 5605/25000 [02:17<07:09, 45.14it/s]

Ep 5600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 23%|██▎       | 5705/25000 [02:19<07:05, 45.38it/s]

Ep 5700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 23%|██▎       | 5806/25000 [02:22<08:24, 38.01it/s]

Ep 5800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 24%|██▎       | 5908/25000 [02:25<08:22, 37.98it/s]

Ep 5900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


 24%|██▍       | 6007/25000 [02:27<07:07, 44.39it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 24%|██▍       | 6107/25000 [02:30<06:55, 45.48it/s]

Ep 6100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 25%|██▍       | 6207/25000 [02:32<07:09, 43.78it/s]

Ep 6200/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 25%|██▌       | 6307/25000 [02:34<06:56, 44.90it/s]

Ep 6300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.2, First Action 8


 26%|██▌       | 6404/25000 [02:37<08:48, 35.22it/s]

Ep 6400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 26%|██▌       | 6504/25000 [02:39<09:04, 33.96it/s]

Ep 6500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 26%|██▋       | 6607/25000 [02:42<07:03, 43.47it/s]

Ep 6600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


 27%|██▋       | 6707/25000 [02:44<06:54, 44.14it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 27%|██▋       | 6807/25000 [02:46<06:43, 45.04it/s]

Ep 6800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 28%|██▊       | 6907/25000 [02:49<06:36, 45.67it/s]

Ep 6900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 28%|██▊       | 7005/25000 [02:51<08:20, 35.96it/s]

Ep 7000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 28%|██▊       | 7105/25000 [02:54<08:42, 34.23it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 29%|██▉       | 7205/25000 [02:56<06:40, 44.44it/s]

Ep 7200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 29%|██▉       | 7305/25000 [02:59<06:40, 44.15it/s]

Ep 7300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 30%|██▉       | 7405/25000 [03:01<06:36, 44.40it/s]

Ep 7400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 30%|███       | 7505/25000 [03:03<06:31, 44.74it/s]

Ep 7500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 30%|███       | 7603/25000 [03:06<08:30, 34.11it/s]

Ep 7600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 31%|███       | 7704/25000 [03:09<09:11, 31.35it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 31%|███       | 7807/25000 [03:11<06:29, 44.14it/s]

Ep 7800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 32%|███▏      | 7907/25000 [03:13<06:16, 45.35it/s]

Ep 7900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 32%|███▏      | 8007/25000 [03:16<06:20, 44.65it/s]

Ep 8000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 4


 32%|███▏      | 8107/25000 [03:18<06:21, 44.29it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 33%|███▎      | 8206/25000 [03:20<08:00, 34.95it/s]

Ep 8200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 33%|███▎      | 8306/25000 [03:23<08:39, 32.11it/s]

Ep 8300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 34%|███▎      | 8406/25000 [03:26<06:20, 43.58it/s]

Ep 8400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 34%|███▍      | 8506/25000 [03:28<06:10, 44.51it/s]

Ep 8500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 34%|███▍      | 8606/25000 [03:30<06:02, 45.26it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 35%|███▍      | 8706/25000 [03:33<06:05, 44.64it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 35%|███▌      | 8806/25000 [03:35<07:40, 35.19it/s]

Ep 8800/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 5


 36%|███▌      | 8906/25000 [03:38<07:34, 35.41it/s]

Ep 8900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 36%|███▌      | 9007/25000 [03:40<05:58, 44.61it/s]

Ep 9000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 36%|███▋      | 9107/25000 [03:43<06:01, 44.00it/s]

Ep 9100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


 37%|███▋      | 9207/25000 [03:45<05:59, 43.94it/s]

Ep 9200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 37%|███▋      | 9307/25000 [03:47<05:42, 45.85it/s]

Ep 9300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 38%|███▊      | 9406/25000 [03:50<06:57, 37.32it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 4


 38%|███▊      | 9503/25000 [03:52<07:59, 32.33it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 38%|███▊      | 9606/25000 [03:55<05:39, 45.30it/s]

Ep 9600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 39%|███▉      | 9706/25000 [03:57<05:46, 44.14it/s]

Ep 9700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 39%|███▉      | 9806/25000 [04:00<05:40, 44.64it/s]

Ep 9800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 40%|███▉      | 9906/25000 [04:02<05:45, 43.71it/s]

Ep 9900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 40%|████      | 10004/25000 [04:04<06:41, 37.38it/s]

Ep 10000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 4


 40%|████      | 10105/25000 [04:07<07:05, 35.04it/s]

Ep 10100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


 41%|████      | 10206/25000 [04:10<05:41, 43.36it/s]

Ep 10200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 41%|████      | 10306/25000 [04:12<05:44, 42.71it/s]

Ep 10300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 42%|████▏     | 10406/25000 [04:14<05:23, 45.17it/s]

Ep 10400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 42%|████▏     | 10506/25000 [04:17<05:24, 44.63it/s]

Ep 10500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 4


 42%|████▏     | 10606/25000 [04:19<06:15, 38.38it/s]

Ep 10600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 43%|████▎     | 10706/25000 [04:22<06:56, 34.35it/s]

Ep 10700/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 4


 43%|████▎     | 10807/25000 [04:24<05:23, 43.91it/s]

Ep 10800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 44%|████▎     | 10907/25000 [04:27<05:20, 43.93it/s]

Ep 10900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 4


 44%|████▍     | 11007/25000 [04:29<05:23, 43.26it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 44%|████▍     | 11107/25000 [04:31<05:23, 42.95it/s]

Ep 11100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 45%|████▍     | 11206/25000 [04:34<06:09, 37.35it/s]

Ep 11200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 8


 45%|████▌     | 11303/25000 [04:36<06:27, 35.33it/s]

Ep 11300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 46%|████▌     | 11407/25000 [04:39<05:00, 45.18it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 46%|████▌     | 11507/25000 [04:42<05:09, 43.54it/s]

Ep 11500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.8, First Action 3


 46%|████▋     | 11607/25000 [04:44<05:08, 43.42it/s]

Ep 11600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 47%|████▋     | 11707/25000 [04:46<04:59, 44.43it/s]

Ep 11700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 47%|████▋     | 11807/25000 [04:48<05:06, 43.00it/s]

Ep 11800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 48%|████▊     | 11904/25000 [04:51<05:59, 36.41it/s]

Ep 11900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 48%|████▊     | 12009/25000 [04:54<04:59, 43.42it/s]

Ep 12000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 48%|████▊     | 12104/25000 [04:56<04:53, 43.87it/s]

Ep 12100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 49%|████▉     | 12209/25000 [04:59<04:49, 44.20it/s]

Ep 12200/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.6, First Action 0


 49%|████▉     | 12309/25000 [05:01<04:55, 42.96it/s]

Ep 12300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 50%|████▉     | 12403/25000 [05:04<06:28, 32.43it/s]

Ep 12400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 50%|█████     | 12504/25000 [05:07<05:52, 35.41it/s]

Ep 12500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 50%|█████     | 12604/25000 [05:09<06:22, 32.44it/s]

Ep 12600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 51%|█████     | 12707/25000 [05:12<04:32, 45.04it/s]

Ep 12700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 51%|█████     | 12807/25000 [05:14<04:39, 43.70it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 52%|█████▏    | 12907/25000 [05:17<04:40, 43.07it/s]

Ep 12900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 52%|█████▏    | 13007/25000 [05:19<04:34, 43.68it/s]

Ep 13000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 5


 52%|█████▏    | 13106/25000 [05:21<05:33, 35.68it/s]

Ep 13100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 53%|█████▎    | 13203/25000 [05:24<05:45, 34.12it/s]

Ep 13200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 53%|█████▎    | 13304/25000 [05:27<04:35, 42.38it/s]

Ep 13300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 54%|█████▎    | 13409/25000 [05:29<04:20, 44.56it/s]

Ep 13400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 7


 54%|█████▍    | 13509/25000 [05:31<04:17, 44.62it/s]

Ep 13500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 54%|█████▍    | 13604/25000 [05:34<04:10, 45.55it/s]

Ep 13600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 55%|█████▍    | 13704/25000 [05:36<04:57, 37.91it/s]

Ep 13700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 55%|█████▌    | 13804/25000 [05:39<05:32, 33.67it/s]

Ep 13800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 5


 56%|█████▌    | 13908/25000 [05:42<04:13, 43.73it/s]

Ep 13900/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 4


 56%|█████▌    | 14008/25000 [05:44<04:09, 44.05it/s]

Ep 14000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 56%|█████▋    | 14108/25000 [05:46<04:11, 43.39it/s]

Ep 14100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 57%|█████▋    | 14208/25000 [05:48<04:03, 44.25it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 57%|█████▋    | 14303/25000 [05:51<04:26, 40.09it/s]

Ep 14300/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 1


 58%|█████▊    | 14404/25000 [05:54<04:43, 37.33it/s]

Ep 14400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 58%|█████▊    | 14508/25000 [05:56<03:55, 44.57it/s]

Ep 14500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 58%|█████▊    | 14608/25000 [05:59<03:54, 44.23it/s]

Ep 14600/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.2, First Action 0


 59%|█████▉    | 14708/25000 [06:01<03:50, 44.72it/s]

Ep 14700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 59%|█████▉    | 14808/25000 [06:03<03:51, 44.05it/s]

Ep 14800/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 3


 60%|█████▉    | 14908/25000 [06:06<03:48, 44.14it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 60%|██████    | 15006/25000 [06:08<04:30, 37.00it/s]

Ep 15000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 60%|██████    | 15107/25000 [06:11<03:49, 43.07it/s]

Ep 15100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 61%|██████    | 15207/25000 [06:13<03:37, 45.03it/s]

Ep 15200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 61%|██████    | 15307/25000 [06:15<03:38, 44.37it/s]

Ep 15300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 62%|██████▏   | 15407/25000 [06:18<03:45, 42.60it/s]

Ep 15400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 62%|██████▏   | 15507/25000 [06:20<03:34, 44.36it/s]

Ep 15500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 62%|██████▏   | 15604/25000 [06:23<04:30, 34.72it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 63%|██████▎   | 15704/25000 [06:26<03:41, 42.05it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 1


 63%|██████▎   | 15809/25000 [06:28<03:30, 43.61it/s]

Ep 15800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 1


 64%|██████▎   | 15904/25000 [06:30<03:22, 44.84it/s]

Ep 15900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 64%|██████▍   | 16009/25000 [06:33<03:23, 44.17it/s]

Ep 16000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 64%|██████▍   | 16104/25000 [06:35<03:23, 43.61it/s]

Ep 16100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 65%|██████▍   | 16207/25000 [06:37<04:00, 36.59it/s]

Ep 16200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 6


 65%|██████▌   | 16303/25000 [06:40<04:13, 34.25it/s]

Ep 16300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 66%|██████▌   | 16405/25000 [06:43<03:13, 44.35it/s]

Ep 16400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 66%|██████▌   | 16505/25000 [06:45<03:15, 43.42it/s]

Ep 16500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 66%|██████▋   | 16605/25000 [06:47<03:08, 44.45it/s]

Ep 16600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 6


 67%|██████▋   | 16705/25000 [06:49<03:05, 44.63it/s]

Ep 16700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 67%|██████▋   | 16804/25000 [06:52<03:47, 36.07it/s]

Ep 16800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 68%|██████▊   | 16904/25000 [06:55<03:54, 34.54it/s]

Ep 16900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 68%|██████▊   | 17008/25000 [06:57<02:58, 44.72it/s]

Ep 17000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 68%|██████▊   | 17108/25000 [07:00<02:56, 44.75it/s]

Ep 17100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 69%|██████▉   | 17208/25000 [07:02<02:59, 43.32it/s]

Ep 17200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 69%|██████▉   | 17308/25000 [07:04<02:55, 43.85it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 0


 70%|██████▉   | 17406/25000 [07:07<03:35, 35.26it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 70%|███████   | 17504/25000 [07:09<03:35, 34.72it/s]

Ep 17500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 7


 70%|███████   | 17605/25000 [07:12<02:45, 44.76it/s]

Ep 17600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 71%|███████   | 17705/25000 [07:14<02:47, 43.68it/s]

Ep 17700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 71%|███████   | 17805/25000 [07:16<02:40, 44.88it/s]

Ep 17800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 72%|███████▏  | 17905/25000 [07:19<02:40, 44.29it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 0


 72%|███████▏  | 18007/25000 [07:21<03:10, 36.73it/s]

Ep 18000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 5


 72%|███████▏  | 18107/25000 [07:24<03:24, 33.69it/s]

Ep 18100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 0


 73%|███████▎  | 18207/25000 [07:27<02:35, 43.58it/s]

Ep 18200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 73%|███████▎  | 18307/25000 [07:29<02:36, 42.64it/s]

Ep 18300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 74%|███████▎  | 18407/25000 [07:31<02:28, 44.43it/s]

Ep 18400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 74%|███████▍  | 18507/25000 [07:33<02:22, 45.46it/s]

Ep 18500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 74%|███████▍  | 18602/25000 [07:36<02:32, 41.98it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 75%|███████▍  | 18704/25000 [07:38<02:48, 37.41it/s]

Ep 18700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 75%|███████▌  | 18807/25000 [07:41<02:22, 43.54it/s]

Ep 18800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 76%|███████▌  | 18907/25000 [07:44<02:17, 44.35it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 76%|███████▌  | 19007/25000 [07:46<02:15, 44.27it/s]

Ep 19000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 76%|███████▋  | 19107/25000 [07:48<02:11, 44.75it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 77%|███████▋  | 19207/25000 [07:50<02:05, 46.07it/s]

Ep 19200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 77%|███████▋  | 19307/25000 [07:53<02:32, 37.28it/s]

Ep 19300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 78%|███████▊  | 19405/25000 [07:56<02:12, 42.30it/s]

Ep 19400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 78%|███████▊  | 19505/25000 [07:58<02:01, 45.28it/s]

Ep 19500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 78%|███████▊  | 19605/25000 [08:00<02:00, 44.66it/s]

Ep 19600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 79%|███████▉  | 19705/25000 [08:03<01:59, 44.48it/s]

Ep 19700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 79%|███████▉  | 19805/25000 [08:05<01:57, 44.39it/s]

Ep 19800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 80%|███████▉  | 19905/25000 [08:08<02:23, 35.55it/s]

Ep 19900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 5


 80%|████████  | 20005/25000 [08:11<02:00, 41.32it/s]

Ep 20000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 80%|████████  | 20105/25000 [08:13<01:51, 43.76it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 3


 81%|████████  | 20205/25000 [08:15<01:49, 43.90it/s]

Ep 20200/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 0


 81%|████████  | 20305/25000 [08:17<01:46, 44.12it/s]

Ep 20300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 82%|████████▏ | 20405/25000 [08:20<01:47, 42.59it/s]

Ep 20400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 82%|████████▏ | 20507/25000 [08:22<02:01, 37.08it/s]

Ep 20500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 82%|████████▏ | 20605/25000 [08:25<02:02, 35.83it/s]

Ep 20600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 83%|████████▎ | 20704/25000 [08:27<01:36, 44.49it/s]

Ep 20700/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.3, First Action 4


 83%|████████▎ | 20809/25000 [08:30<01:34, 44.17it/s]

Ep 20800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 84%|████████▎ | 20904/25000 [08:32<01:33, 43.89it/s]

Ep 20900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 0


 84%|████████▍ | 21009/25000 [08:34<01:30, 44.28it/s]

Ep 21000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 84%|████████▍ | 21104/25000 [08:37<01:47, 36.33it/s]

Ep 21100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 85%|████████▍ | 21205/25000 [08:40<01:29, 42.50it/s]

Ep 21200/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.8, First Action 0


 85%|████████▌ | 21305/25000 [08:42<01:22, 44.61it/s]

Ep 21300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 19.0, First Action 0


 86%|████████▌ | 21405/25000 [08:44<01:19, 45.11it/s]

Ep 21400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 86%|████████▌ | 21505/25000 [08:47<01:21, 43.10it/s]

Ep 21500/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.5, First Action 4


 86%|████████▋ | 21605/25000 [08:49<01:17, 43.90it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 0


 87%|████████▋ | 21706/25000 [08:52<01:32, 35.63it/s]

Ep 21700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 87%|████████▋ | 21807/25000 [08:55<01:16, 41.86it/s]

Ep 21800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 88%|████████▊ | 21907/25000 [08:57<01:08, 45.06it/s]

Ep 21900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 88%|████████▊ | 22007/25000 [08:59<01:09, 43.05it/s]

Ep 22000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 88%|████████▊ | 22104/25000 [09:02<01:21, 35.48it/s]

Ep 22100/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 0


 89%|████████▉ | 22204/25000 [09:05<01:36, 29.09it/s]

Ep 22200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 89%|████████▉ | 22304/25000 [09:08<01:19, 33.87it/s]

Ep 22300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 90%|████████▉ | 22404/25000 [09:11<01:17, 33.37it/s]

Ep 22400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 90%|█████████ | 22508/25000 [09:13<00:56, 43.80it/s]

Ep 22500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 90%|█████████ | 22608/25000 [09:15<00:54, 43.70it/s]

Ep 22600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 91%|█████████ | 22708/25000 [09:17<00:51, 44.60it/s]

Ep 22700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 4


 91%|█████████ | 22808/25000 [09:20<00:49, 44.05it/s]

Ep 22800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 92%|█████████▏| 22904/25000 [09:22<00:59, 35.10it/s]

Ep 22900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 0


 92%|█████████▏| 23006/25000 [09:25<01:00, 32.81it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 92%|█████████▏| 23106/25000 [09:28<00:42, 44.65it/s]

Ep 23100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 93%|█████████▎| 23206/25000 [09:30<00:41, 43.42it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.6, First Action 0


 93%|█████████▎| 23306/25000 [09:32<00:38, 44.15it/s]

Ep 23300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 94%|█████████▎| 23406/25000 [09:34<00:35, 45.16it/s]

Ep 23400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 5


 94%|█████████▍| 23504/25000 [09:37<00:42, 35.53it/s]

Ep 23500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 94%|█████████▍| 23605/25000 [09:40<00:42, 32.82it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 95%|█████████▍| 23708/25000 [09:42<00:29, 43.73it/s]

Ep 23700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 95%|█████████▌| 23808/25000 [09:44<00:26, 45.00it/s]

Ep 23800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 96%|█████████▌| 23908/25000 [09:47<00:25, 43.23it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 96%|█████████▌| 24008/25000 [09:49<00:22, 44.05it/s]

Ep 24000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 4


 96%|█████████▋| 24103/25000 [09:51<00:24, 36.38it/s]

Ep 24100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 97%|█████████▋| 24203/25000 [09:54<00:23, 33.91it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 97%|█████████▋| 24305/25000 [09:57<00:16, 42.40it/s]

Ep 24300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 4


 98%|█████████▊| 24405/25000 [09:59<00:13, 44.25it/s]

Ep 24400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 98%|█████████▊| 24505/25000 [10:01<00:11, 44.57it/s]

Ep 24500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 9


 98%|█████████▊| 24605/25000 [10:04<00:09, 43.75it/s]

Ep 24600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 0


 99%|█████████▉| 24705/25000 [10:06<00:08, 36.61it/s]

Ep 24700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 99%|█████████▉| 24805/25000 [10:09<00:05, 35.25it/s]

Ep 24800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


100%|█████████▉| 24904/25000 [10:12<00:02, 44.24it/s]

Ep 24900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


100%|██████████| 25000/25000 [10:14<00:00, 40.70it/s]


Ep 25000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4

TEST:


 38%|███▊      | 114/300 [00:00<00:01, 138.61it/s]

Ep 100/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 76%|███████▌  | 228/300 [00:01<00:00, 138.21it/s]

Ep 200/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 4


100%|██████████| 300/300 [00:02<00:00, 138.02it/s]


Ep 300/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


## **Resultados**

In [48]:
# List Test Stats files

df_resultados = pd.DataFrame()
df_resultados['Action'] = range(0,11)

for file in os.listdir(google_drive_folder + 'test_stats'):

  if file.startswith('LR'):
    
    df = pd.read_pickle(google_drive_folder + 'test_stats/' + file)
    df = df['First Action'].value_counts().sort_index()
    file_column_name = file.split('.')[0]
    df = df.rename(file_column_name)
    df_resultados = df_resultados.merge(df, left_on = 'Action', right_index=True, how = 'left')

df_resultados = df_resultados.fillna(0).mean(axis=1)
(df_resultados / df_resultados.sum()*100).round(1)

0      7.3
1      2.9
2      9.3
3     10.5
4     24.3
5     11.5
6      2.9
7     21.8
8      6.4
9      2.7
10     0.3
dtype: float64

In [49]:
# List Test Stats files

df_resultados = pd.DataFrame()
df_resultados['Action'] = range(0,11)

for file in os.listdir(google_drive_folder + 'train_stats'):

  if file.startswith('LR'):
    
    df = pd.read_pickle(google_drive_folder + 'train_stats/' + file)
    df = df['First Action'].value_counts().sort_index()
    file_column_name = file.split('.')[0]
    df = df.rename(file_column_name)
    df_resultados = df_resultados.merge(df, left_on = 'Action', right_index=True, how = 'left')

df_resultados = df_resultados.fillna(0).mean(axis=1)
(df_resultados / df_resultados.sum()*100).round(1)

0      6.6
1      5.0
2     13.8
3     11.5
4     18.5
5      9.1
6      4.0
7     20.0
8      6.3
9      4.6
10     0.6
dtype: float64

## **Trying different seeds**

In [None]:
SEEDS = [303, 55, 7, 101, 455]

# Set Google Drive Folder
google_drive_folder = '/content/drive/MyDrive/DataMining/Aprendizaje Reforzado/Monografia/Exp3/'
os.makedirs(google_drive_folder + 'models', exist_ok=True)
os.makedirs(google_drive_folder + 'train_stats', exist_ok=True)
os.makedirs(google_drive_folder + 'test_stats', exist_ok=True)

# Hyperparameters
GAMMA_LIST = [0.8, 0.9, 1]
LR_LIST = [0.0001, 0.001, 0.01, 0.1]
ENTROPY_DECAY = [False, True]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

hidden_size = 48
max_steps = 5
n_episodes_train = 25000
n_episodes_test= 300


hyperparameters = list(itertools.product(GAMMA_LIST, LR_LIST, ENTROPY_DECAY))
random.shuffle(hyperparameters)

for seed in SEEDS:
    print(f"SEED: {seed}")

    np.random.seed(seed)
    torch.manual_seed(seed)

    for i, (gamma, learning_rate, entropy_decay) in enumerate(hyperparameters):

        print(f'\nGAMMA {gamma} - LR {learning_rate} - Entropy Decay {entropy_decay}')

        # Environment and agent setup
        env = BanditEnvironment(max_steps = max_steps)
        num_actions = env.num_actions
        input_dim = 1 + num_actions + 1  # Timestep, past action oh, past reward
        agent = A2C_LSTM_Agent(input_dim, hidden_size, num_actions).to(device)
        optimizer = optim.Adam(agent.parameters(), lr=learning_rate)

        # Train
        stats = train_agent(TRAIN = True, n_episodes = n_episodes_train, gamma = gamma, learning_rate = learning_rate, entropy_decay = entropy_decay)

        # Save Model and Train Stats
        agent.eval()
        model_name = f'SEED_{str(seed)}__LR_{str(learning_rate).replace(".","_")}__GAMMA_{str(gamma).replace(".","_")}__EntropyDecay_{str(entropy_decay)}'
        torch.save(agent.state_dict(), google_drive_folder + 'models/'+ model_name + '.pth')
        save_train_stats(stats, model_name, gamma, learning_rate, entropy_decay)

        print(f'\nTEST:')
        # Test and save stats
        stats = train_agent(TRAIN = False, n_episodes = n_episodes_test, entropy_decay = entropy_decay)
        save_test_stats(stats, model_name, gamma, learning_rate, entropy_decay)



SEED: 303

GAMMA 1 - LR 0.01 - Entropy Decay True


  0%|          | 108/25000 [00:03<08:18, 49.97it/s]

Ep 100/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.9, First Action 0


  1%|          | 208/25000 [00:05<08:21, 49.42it/s]

Ep 200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  1%|          | 306/25000 [00:08<10:38, 38.70it/s]

Ep 300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


  2%|▏         | 407/25000 [00:10<08:24, 48.74it/s]

Ep 400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  2%|▏         | 508/25000 [00:12<08:10, 49.90it/s]

Ep 500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


  2%|▏         | 609/25000 [00:14<08:15, 49.20it/s]

Ep 600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  3%|▎         | 707/25000 [00:16<08:29, 47.70it/s]

Ep 700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


  3%|▎         | 807/25000 [00:18<08:18, 48.53it/s]

Ep 800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  4%|▎         | 905/25000 [00:21<10:09, 39.50it/s]

Ep 900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  4%|▍         | 1006/25000 [00:23<08:37, 46.40it/s]

Ep 1000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


  4%|▍         | 1106/25000 [00:25<07:58, 49.98it/s]

Ep 1100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


  5%|▍         | 1208/25000 [00:27<08:20, 47.54it/s]

Ep 1200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


  5%|▌         | 1306/25000 [00:29<08:24, 46.95it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


  6%|▌         | 1408/25000 [00:31<08:01, 49.03it/s]

Ep 1400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 0


  6%|▌         | 1506/25000 [00:34<09:48, 39.92it/s]

Ep 1500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  6%|▋         | 1609/25000 [00:36<08:33, 45.59it/s]

Ep 1600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  7%|▋         | 1709/25000 [00:38<07:58, 48.62it/s]

Ep 1700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  7%|▋         | 1807/25000 [00:40<07:50, 49.30it/s]

Ep 1800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  8%|▊         | 1908/25000 [00:43<07:54, 48.65it/s]

Ep 1900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


  8%|▊         | 2007/25000 [00:45<07:53, 48.55it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  8%|▊         | 2105/25000 [00:47<09:45, 39.13it/s]

Ep 2100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  9%|▉         | 2207/25000 [00:50<09:25, 40.32it/s]

Ep 2200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  9%|▉         | 2309/25000 [00:52<07:36, 49.65it/s]

Ep 2300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 10%|▉         | 2409/25000 [00:54<07:36, 49.48it/s]

Ep 2400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 10%|█         | 2507/25000 [00:56<07:34, 49.52it/s]

Ep 2500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 10%|█         | 2604/25000 [00:58<11:31, 32.40it/s]

Ep 2600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 0


 11%|█         | 2705/25000 [01:01<09:38, 38.52it/s]

Ep 2700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 0


 11%|█         | 2808/25000 [01:04<08:09, 45.38it/s]

Ep 2800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 12%|█▏        | 2905/25000 [01:06<07:33, 48.73it/s]

Ep 2900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 12%|█▏        | 3007/25000 [01:08<07:36, 48.15it/s]

Ep 3000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 12%|█▏        | 3106/25000 [01:10<07:39, 47.65it/s]

Ep 3100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 0


 13%|█▎        | 3209/25000 [01:12<07:37, 47.67it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 13%|█▎        | 3303/25000 [01:14<09:23, 38.52it/s]

Ep 3300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 14%|█▎        | 3410/25000 [01:17<08:07, 44.25it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 14%|█▍        | 3508/25000 [01:19<07:14, 49.44it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 14%|█▍        | 3609/25000 [01:21<07:13, 49.36it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 15%|█▍        | 3706/25000 [01:23<07:18, 48.54it/s]

Ep 3700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 15%|█▌        | 3808/25000 [01:25<07:09, 49.39it/s]

Ep 3800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 16%|█▌        | 3906/25000 [01:27<08:50, 39.78it/s]

Ep 3900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 16%|█▌        | 4004/25000 [01:30<10:07, 34.56it/s]

Ep 4000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 16%|█▋        | 4105/25000 [01:32<07:04, 49.21it/s]

Ep 4100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 17%|█▋        | 4208/25000 [01:34<06:59, 49.53it/s]

Ep 4200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 17%|█▋        | 4307/25000 [01:36<07:05, 48.67it/s]

Ep 4300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 18%|█▊        | 4410/25000 [01:38<06:52, 49.93it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 18%|█▊        | 4505/25000 [01:40<08:27, 40.39it/s]

Ep 4500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 18%|█▊        | 4605/25000 [01:43<09:28, 35.86it/s]

Ep 4600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 19%|█▉        | 4709/25000 [01:45<06:59, 48.39it/s]

Ep 4700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 19%|█▉        | 4809/25000 [01:47<07:00, 48.01it/s]

Ep 4800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 20%|█▉        | 4909/25000 [01:49<07:05, 47.26it/s]

Ep 4900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 20%|██        | 5008/25000 [01:51<06:53, 48.29it/s]

Ep 5000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 20%|██        | 5106/25000 [01:53<07:01, 47.21it/s]

Ep 5100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 21%|██        | 5206/25000 [01:56<09:06, 36.24it/s]

Ep 5200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 0


 21%|██        | 5306/25000 [01:58<06:37, 49.53it/s]

Ep 5300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 22%|██▏       | 5405/25000 [02:00<06:40, 48.92it/s]

Ep 5400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 22%|██▏       | 5508/25000 [02:02<06:34, 49.36it/s]

Ep 5500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 22%|██▏       | 5604/25000 [02:04<06:44, 47.94it/s]

Ep 5600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 23%|██▎       | 5705/25000 [02:06<06:33, 49.05it/s]

Ep 5700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 23%|██▎       | 5805/25000 [02:09<08:26, 37.89it/s]

Ep 5800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 24%|██▎       | 5907/25000 [02:11<06:39, 47.78it/s]

Ep 5900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 24%|██▍       | 6009/25000 [02:13<06:26, 49.15it/s]

Ep 6000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 24%|██▍       | 6107/25000 [02:15<06:35, 47.77it/s]

Ep 6100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 25%|██▍       | 6206/25000 [02:18<06:23, 49.05it/s]

Ep 6200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 25%|██▌       | 6305/25000 [02:20<06:11, 50.27it/s]

Ep 6300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 26%|██▌       | 6407/25000 [02:22<07:52, 39.38it/s]

Ep 6400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 26%|██▌       | 6507/25000 [02:24<06:26, 47.83it/s]

Ep 6500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 26%|██▋       | 6608/25000 [02:27<06:13, 49.18it/s]

Ep 6600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 27%|██▋       | 6705/25000 [02:29<06:19, 48.24it/s]

Ep 6700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 27%|██▋       | 6805/25000 [02:31<06:17, 48.15it/s]

Ep 6800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 28%|██▊       | 6909/25000 [02:33<06:11, 48.68it/s]

Ep 6900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 28%|██▊       | 7006/25000 [02:35<07:38, 39.27it/s]

Ep 7000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 28%|██▊       | 7109/25000 [02:38<06:12, 48.05it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 0


 29%|██▉       | 7209/25000 [02:40<06:08, 48.24it/s]

Ep 7200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 29%|██▉       | 7309/25000 [02:42<06:06, 48.30it/s]

Ep 7300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 30%|██▉       | 7406/25000 [02:44<06:10, 47.51it/s]

Ep 7400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 30%|███       | 7508/25000 [02:46<06:10, 47.16it/s]

Ep 7500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 30%|███       | 7604/25000 [02:48<07:38, 37.96it/s]

Ep 7600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 31%|███       | 7708/25000 [02:51<06:10, 46.67it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 31%|███       | 7805/25000 [02:53<05:43, 50.02it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


 32%|███▏      | 7909/25000 [02:55<05:44, 49.59it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 32%|███▏      | 8009/25000 [02:57<05:45, 49.12it/s]

Ep 8000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 32%|███▏      | 8105/25000 [02:59<05:44, 49.10it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 33%|███▎      | 8204/25000 [03:01<07:19, 38.19it/s]

Ep 8200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 33%|███▎      | 8305/25000 [03:04<06:27, 43.05it/s]

Ep 8300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 34%|███▎      | 8409/25000 [03:06<05:38, 49.05it/s]

Ep 8400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 34%|███▍      | 8508/25000 [03:08<05:46, 47.57it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 34%|███▍      | 8610/25000 [03:10<05:28, 49.91it/s]

Ep 8600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 35%|███▍      | 8706/25000 [03:12<05:42, 47.56it/s]

Ep 8700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 35%|███▌      | 8807/25000 [03:15<07:05, 38.08it/s]

Ep 8800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 36%|███▌      | 8905/25000 [03:17<07:56, 33.81it/s]

Ep 8900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 36%|███▌      | 9005/25000 [03:19<05:30, 48.45it/s]

Ep 9000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 36%|███▋      | 9107/25000 [03:21<05:32, 47.79it/s]

Ep 9100/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 0


 37%|███▋      | 9205/25000 [03:23<05:32, 47.55it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 37%|███▋      | 9305/25000 [03:25<05:29, 47.59it/s]

Ep 9300/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 0


 38%|███▊      | 9406/25000 [03:27<06:22, 40.79it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 0


 38%|███▊      | 9506/25000 [03:30<07:49, 32.99it/s]

Ep 9500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 0


 38%|███▊      | 9605/25000 [03:32<05:18, 48.38it/s]

Ep 9600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 0


 39%|███▉      | 9708/25000 [03:34<05:10, 49.27it/s]

Ep 9700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 39%|███▉      | 9806/25000 [03:36<05:08, 49.31it/s]

Ep 9800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 40%|███▉      | 9908/25000 [03:39<05:11, 48.38it/s]

Ep 9900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 40%|████      | 10005/25000 [03:41<05:06, 48.93it/s]

Ep 10000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 40%|████      | 10103/25000 [03:43<07:06, 34.94it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 41%|████      | 10205/25000 [03:46<05:11, 47.53it/s]

Ep 10200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 41%|████      | 10306/25000 [03:48<05:05, 48.05it/s]

Ep 10300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 42%|████▏     | 10408/25000 [03:50<05:01, 48.37it/s]

Ep 10400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 0


 42%|████▏     | 10507/25000 [03:52<05:04, 47.55it/s]

Ep 10500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 42%|████▏     | 10608/25000 [03:54<04:51, 49.40it/s]

Ep 10600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 43%|████▎     | 10705/25000 [03:57<06:41, 35.64it/s]

Ep 10700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 43%|████▎     | 10805/25000 [03:59<04:45, 49.79it/s]

Ep 10800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 44%|████▎     | 10909/25000 [04:01<04:42, 49.89it/s]

Ep 10900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 44%|████▍     | 11006/25000 [04:03<04:46, 48.76it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 44%|████▍     | 11105/25000 [04:05<04:39, 49.68it/s]

Ep 11100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 45%|████▍     | 11205/25000 [04:07<04:38, 49.51it/s]

Ep 11200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 45%|████▌     | 11306/25000 [04:10<05:55, 38.53it/s]

Ep 11300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 46%|████▌     | 11407/25000 [04:12<04:37, 49.00it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 46%|████▌     | 11505/25000 [04:14<04:35, 48.91it/s]

Ep 11500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 46%|████▋     | 11609/25000 [04:16<04:39, 47.87it/s]

Ep 11600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 47%|████▋     | 11709/25000 [04:18<04:37, 47.92it/s]

Ep 11700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 47%|████▋     | 11807/25000 [04:20<04:35, 47.90it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 48%|████▊     | 11904/25000 [04:23<05:42, 38.25it/s]

Ep 11900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 48%|████▊     | 12004/25000 [04:25<04:45, 45.58it/s]

Ep 12000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 48%|████▊     | 12108/25000 [04:27<04:34, 46.97it/s]

Ep 12100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 49%|████▉     | 12204/25000 [04:29<04:36, 46.33it/s]

Ep 12200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 49%|████▉     | 12307/25000 [04:32<04:28, 47.24it/s]

Ep 12300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 50%|████▉     | 12409/25000 [04:34<04:22, 47.98it/s]

Ep 12400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 0


 50%|█████     | 12506/25000 [04:36<05:16, 39.50it/s]

Ep 12500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 50%|█████     | 12606/25000 [04:38<04:34, 45.08it/s]

Ep 12600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 51%|█████     | 12709/25000 [04:41<04:11, 48.92it/s]

Ep 12700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 51%|█████     | 12807/25000 [04:43<04:05, 49.66it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 52%|█████▏    | 12907/25000 [04:45<04:08, 48.61it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 52%|█████▏    | 13006/25000 [04:47<04:03, 49.20it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 52%|█████▏    | 13106/25000 [04:49<05:09, 38.41it/s]

Ep 13100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


 53%|█████▎    | 13207/25000 [04:52<04:36, 42.58it/s]

Ep 13200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 53%|█████▎    | 13310/25000 [04:54<03:57, 49.27it/s]

Ep 13300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 54%|█████▎    | 13407/25000 [04:56<03:53, 49.56it/s]

Ep 13400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 54%|█████▍    | 13507/25000 [04:58<03:53, 49.32it/s]

Ep 13500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 54%|█████▍    | 13607/25000 [05:00<03:53, 48.88it/s]

Ep 13600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 55%|█████▍    | 13704/25000 [05:02<04:46, 39.37it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 55%|█████▌    | 13808/25000 [05:05<04:51, 38.33it/s]

Ep 13800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 56%|█████▌    | 13910/25000 [05:07<03:43, 49.57it/s]

Ep 13900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 56%|█████▌    | 14009/25000 [05:09<03:43, 49.15it/s]

Ep 14000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 56%|█████▋    | 14108/25000 [05:11<03:42, 49.03it/s]

Ep 14100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 57%|█████▋    | 14208/25000 [05:13<03:40, 48.86it/s]

Ep 14200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 57%|█████▋    | 14305/25000 [05:15<04:39, 38.33it/s]

Ep 14300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 58%|█████▊    | 14403/25000 [05:18<05:04, 34.83it/s]

Ep 14400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 58%|█████▊    | 14506/25000 [05:20<03:35, 48.66it/s]

Ep 14500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 58%|█████▊    | 14605/25000 [05:22<03:37, 47.73it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 59%|█████▉    | 14705/25000 [05:24<03:40, 46.73it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 59%|█████▉    | 14806/25000 [05:26<03:33, 47.78it/s]

Ep 14800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 60%|█████▉    | 14904/25000 [05:28<03:52, 43.38it/s]

Ep 14900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 60%|██████    | 15005/25000 [05:31<04:38, 35.94it/s]

Ep 15000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 60%|██████    | 15104/25000 [05:33<03:26, 47.88it/s]

Ep 15100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 61%|██████    | 15207/25000 [05:35<03:24, 47.88it/s]

Ep 15200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 61%|██████    | 15309/25000 [05:37<03:17, 49.13it/s]

Ep 15300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


 62%|██████▏   | 15408/25000 [05:39<03:14, 49.24it/s]

Ep 15400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 62%|██████▏   | 15504/25000 [05:41<03:17, 48.16it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 62%|██████▏   | 15604/25000 [05:44<04:23, 35.62it/s]

Ep 15600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 63%|██████▎   | 15707/25000 [05:47<03:11, 48.64it/s]

Ep 15700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 63%|██████▎   | 15805/25000 [05:49<03:08, 48.75it/s]

Ep 15800/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 0


 64%|██████▎   | 15907/25000 [05:51<03:08, 48.33it/s]

Ep 15900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 64%|██████▍   | 16006/25000 [05:53<03:02, 49.16it/s]

Ep 16000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 64%|██████▍   | 16105/25000 [05:55<03:02, 48.78it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 0


 65%|██████▍   | 16204/25000 [05:57<04:08, 35.39it/s]

Ep 16200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 65%|██████▌   | 16306/25000 [06:00<02:55, 49.48it/s]

Ep 16300/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 0


 66%|██████▌   | 16406/25000 [06:02<02:57, 48.45it/s]

Ep 16400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 66%|██████▌   | 16510/25000 [06:04<02:52, 49.35it/s]

Ep 16500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 66%|██████▋   | 16607/25000 [06:06<02:51, 48.95it/s]

Ep 16600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 67%|██████▋   | 16706/25000 [06:08<02:48, 49.11it/s]

Ep 16700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 67%|██████▋   | 16805/25000 [06:11<03:41, 37.02it/s]

Ep 16800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 68%|██████▊   | 16905/25000 [06:13<02:47, 48.29it/s]

Ep 16900/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 0


 68%|██████▊   | 17006/25000 [06:15<02:52, 46.35it/s]

Ep 17000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 68%|██████▊   | 17108/25000 [06:17<02:44, 48.11it/s]

Ep 17100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 69%|██████▉   | 17205/25000 [06:19<02:42, 47.95it/s]

Ep 17200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 69%|██████▉   | 17306/25000 [06:21<02:38, 48.68it/s]

Ep 17300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 70%|██████▉   | 17406/25000 [06:24<03:13, 39.29it/s]

Ep 17400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 70%|███████   | 17504/25000 [06:26<02:41, 46.45it/s]

Ep 17500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 0


 70%|███████   | 17607/25000 [06:29<02:34, 47.89it/s]

Ep 17600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 71%|███████   | 17705/25000 [06:31<02:31, 48.23it/s]

Ep 17700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 71%|███████   | 17809/25000 [06:33<02:28, 48.56it/s]

Ep 17800/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 0


 72%|███████▏  | 17905/25000 [06:35<02:27, 48.09it/s]

Ep 17900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 0


 72%|███████▏  | 18005/25000 [06:37<03:12, 36.29it/s]

Ep 18000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 72%|███████▏  | 18108/25000 [06:40<02:24, 47.84it/s]

Ep 18100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 73%|███████▎  | 18210/25000 [06:42<02:19, 48.76it/s]

Ep 18200/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 0


 73%|███████▎  | 18309/25000 [06:44<02:14, 49.60it/s]

Ep 18300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 74%|███████▎  | 18406/25000 [06:46<02:17, 47.99it/s]

Ep 18400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 74%|███████▍  | 18507/25000 [06:48<02:15, 47.88it/s]

Ep 18500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 74%|███████▍  | 18606/25000 [06:51<02:51, 37.18it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 0


 75%|███████▍  | 18708/25000 [06:53<02:16, 45.98it/s]

Ep 18700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 75%|███████▌  | 18807/25000 [06:55<02:10, 47.37it/s]

Ep 18800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 76%|███████▌  | 18905/25000 [06:57<02:08, 47.48it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 76%|███████▌  | 19006/25000 [06:59<02:07, 47.14it/s]

Ep 19000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 76%|███████▋  | 19109/25000 [07:01<02:04, 47.47it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 77%|███████▋  | 19205/25000 [07:04<02:34, 37.51it/s]

Ep 19200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 77%|███████▋  | 19305/25000 [07:06<02:00, 47.29it/s]

Ep 19300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 78%|███████▊  | 19405/25000 [07:08<01:53, 49.44it/s]

Ep 19400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 78%|███████▊  | 19505/25000 [07:11<01:51, 49.37it/s]

Ep 19500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 78%|███████▊  | 19606/25000 [07:13<01:49, 49.07it/s]

Ep 19600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 79%|███████▉  | 19706/25000 [07:15<01:51, 47.53it/s]

Ep 19700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 79%|███████▉  | 19804/25000 [07:17<02:14, 38.64it/s]

Ep 19800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 80%|███████▉  | 19908/25000 [07:20<01:45, 48.13it/s]

Ep 19900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 80%|████████  | 20005/25000 [07:22<01:42, 48.95it/s]

Ep 20000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 80%|████████  | 20105/25000 [07:24<01:40, 48.84it/s]

Ep 20100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 81%|████████  | 20206/25000 [07:26<01:37, 49.05it/s]

Ep 20200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 81%|████████  | 20308/25000 [07:28<01:36, 48.80it/s]

Ep 20300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 82%|████████▏ | 20405/25000 [07:30<01:58, 38.64it/s]

Ep 20400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 82%|████████▏ | 20507/25000 [07:33<01:37, 46.09it/s]

Ep 20500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 82%|████████▏ | 20609/25000 [07:35<01:31, 47.98it/s]

Ep 20600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 83%|████████▎ | 20709/25000 [07:37<01:29, 47.88it/s]

Ep 20700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 83%|████████▎ | 20806/25000 [07:39<01:28, 47.63it/s]

Ep 20800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 84%|████████▎ | 20907/25000 [07:41<01:25, 47.87it/s]

Ep 20900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 84%|████████▍ | 21007/25000 [07:44<01:43, 38.67it/s]

Ep 21000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 84%|████████▍ | 21107/25000 [07:46<01:29, 43.45it/s]

Ep 21100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 85%|████████▍ | 21208/25000 [07:49<01:19, 47.78it/s]

Ep 21200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 85%|████████▌ | 21305/25000 [07:51<01:17, 47.78it/s]

Ep 21300/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 0


 86%|████████▌ | 21408/25000 [07:53<01:15, 47.77it/s]

Ep 21400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 86%|████████▌ | 21508/25000 [07:55<01:11, 49.17it/s]

Ep 21500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 86%|████████▋ | 21604/25000 [07:57<01:26, 39.13it/s]

Ep 21600/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 0


 87%|████████▋ | 21707/25000 [08:00<01:22, 39.99it/s]

Ep 21700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 87%|████████▋ | 21808/25000 [08:02<01:08, 46.46it/s]

Ep 21800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 0


 88%|████████▊ | 21904/25000 [08:04<01:09, 44.35it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 88%|████████▊ | 22005/25000 [08:06<01:04, 46.79it/s]

Ep 22000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 88%|████████▊ | 22105/25000 [08:08<01:01, 47.30it/s]

Ep 22100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 89%|████████▉ | 22205/25000 [08:11<01:12, 38.40it/s]

Ep 22200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 89%|████████▉ | 22305/25000 [08:13<01:08, 39.07it/s]

Ep 22300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 90%|████████▉ | 22405/25000 [08:16<00:55, 46.34it/s]

Ep 22400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 90%|█████████ | 22506/25000 [08:18<00:53, 46.45it/s]

Ep 22500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 90%|█████████ | 22608/25000 [08:20<00:48, 49.36it/s]

Ep 22600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 91%|█████████ | 22708/25000 [08:22<00:47, 48.12it/s]

Ep 22700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 91%|█████████ | 22807/25000 [08:24<00:59, 36.68it/s]

Ep 22800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 92%|█████████▏| 22908/25000 [08:27<00:46, 45.36it/s]

Ep 22900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 92%|█████████▏| 23008/25000 [08:29<00:41, 48.57it/s]

Ep 23000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 92%|█████████▏| 23107/25000 [08:31<00:38, 48.99it/s]

Ep 23100/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 0


 93%|█████████▎| 23205/25000 [08:33<00:38, 47.15it/s]

Ep 23200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 93%|█████████▎| 23307/25000 [08:35<00:36, 46.27it/s]

Ep 23300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 94%|█████████▎| 23404/25000 [08:38<00:44, 35.77it/s]

Ep 23400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 94%|█████████▍| 23505/25000 [08:41<00:33, 44.93it/s]

Ep 23500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 94%|█████████▍| 23605/25000 [08:43<00:28, 48.20it/s]

Ep 23600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 95%|█████████▍| 23706/25000 [08:45<00:27, 47.89it/s]

Ep 23700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0


 95%|█████████▌| 23806/25000 [08:47<00:25, 47.38it/s]

Ep 23800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 96%|█████████▌| 23908/25000 [08:49<00:23, 46.18it/s]

Ep 23900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 96%|█████████▌| 24007/25000 [08:52<00:26, 37.87it/s]

Ep 24000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


 96%|█████████▋| 24105/25000 [08:54<00:20, 43.97it/s]

Ep 24100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 0


 97%|█████████▋| 24205/25000 [08:56<00:17, 46.32it/s]

Ep 24200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 97%|█████████▋| 24305/25000 [08:59<00:14, 46.95it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 98%|█████████▊| 24405/25000 [09:01<00:12, 47.56it/s]

Ep 24400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 98%|█████████▊| 24505/25000 [09:03<00:10, 47.14it/s]

Ep 24500/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 0


 98%|█████████▊| 24606/25000 [09:05<00:10, 37.17it/s]

Ep 24600/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 0


 99%|█████████▉| 24707/25000 [09:08<00:06, 47.58it/s]

Ep 24700/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 0


 99%|█████████▉| 24809/25000 [09:10<00:03, 49.01it/s]

Ep 24800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


100%|█████████▉| 24907/25000 [09:12<00:01, 48.16it/s]

Ep 24900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 0


100%|██████████| 25000/25000 [09:14<00:00, 45.07it/s]


Ep 25000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0

TEST:


 43%|████▎     | 128/300 [00:00<00:01, 148.90it/s]

Ep 100/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 75%|███████▍  | 224/300 [00:01<00:00, 152.98it/s]

Ep 200/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 0


100%|██████████| 300/300 [00:01<00:00, 151.03it/s]


Ep 300/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 0

GAMMA 1 - LR 0.1 - Entropy Decay False


  0%|          | 107/25000 [00:02<11:06, 37.33it/s]

Ep 100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


  1%|          | 209/25000 [00:05<08:35, 48.10it/s]

Ep 200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  1%|          | 305/25000 [00:07<08:29, 48.50it/s]

Ep 300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  2%|▏         | 407/25000 [00:09<08:25, 48.65it/s]

Ep 400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  2%|▏         | 508/25000 [00:11<08:30, 47.94it/s]

Ep 500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  2%|▏         | 609/25000 [00:13<08:34, 47.43it/s]

Ep 600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  3%|▎         | 705/25000 [00:16<11:04, 36.54it/s]

Ep 700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  3%|▎         | 806/25000 [00:18<08:39, 46.56it/s]

Ep 800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


  4%|▎         | 906/25000 [00:20<08:16, 48.58it/s]

Ep 900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  4%|▍         | 1007/25000 [00:22<08:29, 47.07it/s]

Ep 1000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  4%|▍         | 1107/25000 [00:25<08:25, 47.27it/s]

Ep 1100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  5%|▍         | 1207/25000 [00:27<08:16, 47.94it/s]

Ep 1200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  5%|▌         | 1307/25000 [00:29<10:24, 37.95it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  6%|▌         | 1406/25000 [00:32<08:29, 46.27it/s]

Ep 1400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  6%|▌         | 1508/25000 [00:34<08:04, 48.53it/s]

Ep 1500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  6%|▋         | 1606/25000 [00:36<08:01, 48.61it/s]

Ep 1600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 2


  7%|▋         | 1707/25000 [00:38<07:55, 48.99it/s]

Ep 1700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  7%|▋         | 1808/25000 [00:40<07:53, 48.93it/s]

Ep 1800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  8%|▊         | 1905/25000 [00:42<10:12, 37.72it/s]

Ep 1900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  8%|▊         | 2007/25000 [00:45<08:25, 45.49it/s]

Ep 2000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 2


  8%|▊         | 2108/25000 [00:47<07:52, 48.45it/s]

Ep 2100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


  9%|▉         | 2208/25000 [00:49<07:51, 48.31it/s]

Ep 2200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


  9%|▉         | 2305/25000 [00:51<07:56, 47.68it/s]

Ep 2300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 10%|▉         | 2406/25000 [00:53<08:03, 46.70it/s]

Ep 2400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 10%|█         | 2505/25000 [00:56<09:59, 37.50it/s]

Ep 2500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 10%|█         | 2606/25000 [00:59<08:31, 43.80it/s]

Ep 2600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 11%|█         | 2708/25000 [01:01<07:44, 48.02it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 11%|█         | 2809/25000 [01:03<07:45, 47.70it/s]

Ep 2800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 12%|█▏        | 2910/25000 [01:05<07:25, 49.53it/s]

Ep 2900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 12%|█▏        | 3006/25000 [01:07<07:37, 48.10it/s]

Ep 3000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 12%|█▏        | 3106/25000 [01:09<09:15, 39.38it/s]

Ep 3100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 13%|█▎        | 3208/25000 [01:12<08:07, 44.67it/s]

Ep 3200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 13%|█▎        | 3308/25000 [01:14<07:35, 47.65it/s]

Ep 3300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 14%|█▎        | 3408/25000 [01:16<07:31, 47.80it/s]

Ep 3400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 14%|█▍        | 3505/25000 [01:18<07:27, 48.01it/s]

Ep 3500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 14%|█▍        | 3606/25000 [01:20<07:36, 46.91it/s]

Ep 3600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 15%|█▍        | 3708/25000 [01:23<09:04, 39.11it/s]

Ep 3700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 15%|█▌        | 3809/25000 [01:26<07:53, 44.78it/s]

Ep 3800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 16%|█▌        | 3908/25000 [01:28<07:22, 47.72it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 16%|█▌        | 4008/25000 [01:30<07:20, 47.70it/s]

Ep 4000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 16%|█▋        | 4109/25000 [01:32<07:10, 48.49it/s]

Ep 4100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 17%|█▋        | 4209/25000 [01:34<07:13, 47.95it/s]

Ep 4200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 17%|█▋        | 4306/25000 [01:36<08:54, 38.75it/s]

Ep 4300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 18%|█▊        | 4406/25000 [01:39<07:56, 43.25it/s]

Ep 4400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 18%|█▊        | 4506/25000 [01:41<07:02, 48.45it/s]

Ep 4500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 18%|█▊        | 4607/25000 [01:43<06:58, 48.76it/s]

Ep 4600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 19%|█▉        | 4709/25000 [01:45<06:55, 48.85it/s]

Ep 4700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 19%|█▉        | 4807/25000 [01:47<06:50, 49.14it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 20%|█▉        | 4906/25000 [01:50<08:42, 38.46it/s]

Ep 4900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 20%|██        | 5008/25000 [01:52<08:33, 38.97it/s]

Ep 5000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 20%|██        | 5108/25000 [01:55<07:00, 47.32it/s]

Ep 5100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 21%|██        | 5208/25000 [01:57<06:51, 48.05it/s]

Ep 5200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 21%|██        | 5306/25000 [01:59<07:06, 46.13it/s]

Ep 5300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 22%|██▏       | 5406/25000 [02:01<06:51, 47.65it/s]

Ep 5400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 22%|██▏       | 5504/25000 [02:03<08:45, 37.11it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 22%|██▏       | 5606/25000 [02:06<08:09, 39.61it/s]

Ep 5600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 23%|██▎       | 5706/25000 [02:08<06:44, 47.65it/s]

Ep 5700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 23%|██▎       | 5808/25000 [02:10<06:32, 48.88it/s]

Ep 5800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 24%|██▎       | 5909/25000 [02:12<06:32, 48.69it/s]

Ep 5900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 24%|██▍       | 6005/25000 [02:14<06:31, 48.47it/s]

Ep 6000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 24%|██▍       | 6107/25000 [02:17<08:30, 37.03it/s]

Ep 6100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 25%|██▍       | 6206/25000 [02:19<07:37, 41.09it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 25%|██▌       | 6309/25000 [02:22<06:27, 48.22it/s]

Ep 6300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 26%|██▌       | 6409/25000 [02:24<06:30, 47.66it/s]

Ep 6400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 26%|██▌       | 6509/25000 [02:26<06:24, 48.11it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 26%|██▋       | 6605/25000 [02:28<06:22, 48.11it/s]

Ep 6600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 27%|██▋       | 6706/25000 [02:30<07:53, 38.65it/s]

Ep 6700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 27%|██▋       | 6806/25000 [02:33<07:15, 41.76it/s]

Ep 6800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 28%|██▊       | 6907/25000 [02:35<06:13, 48.48it/s]

Ep 6900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 28%|██▊       | 7008/25000 [02:37<06:12, 48.33it/s]

Ep 7000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 28%|██▊       | 7108/25000 [02:39<06:08, 48.55it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 29%|██▉       | 7209/25000 [02:41<06:02, 49.06it/s]

Ep 7200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 29%|██▉       | 7305/25000 [02:43<07:51, 37.50it/s]

Ep 7300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 30%|██▉       | 7409/25000 [02:46<06:55, 42.29it/s]

Ep 7400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 30%|███       | 7505/25000 [02:48<06:02, 48.31it/s]

Ep 7500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 30%|███       | 7608/25000 [02:50<06:06, 47.45it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 31%|███       | 7707/25000 [02:53<06:04, 47.43it/s]

Ep 7700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 31%|███       | 7805/25000 [02:55<06:07, 46.77it/s]

Ep 7800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 32%|███▏      | 7906/25000 [02:57<07:43, 36.84it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 32%|███▏      | 8006/25000 [03:00<08:06, 34.90it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 32%|███▏      | 8107/25000 [03:02<05:56, 47.42it/s]

Ep 8100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 33%|███▎      | 8207/25000 [03:04<05:52, 47.69it/s]

Ep 8200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 33%|███▎      | 8309/25000 [03:06<05:47, 48.00it/s]

Ep 8300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 34%|███▎      | 8408/25000 [03:08<05:42, 48.50it/s]

Ep 8400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 34%|███▍      | 8504/25000 [03:10<07:01, 39.11it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 34%|███▍      | 8604/25000 [03:13<07:50, 34.85it/s]

Ep 8600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 35%|███▍      | 8705/25000 [03:15<05:36, 48.48it/s]

Ep 8700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 35%|███▌      | 8806/25000 [03:17<05:33, 48.52it/s]

Ep 8800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 36%|███▌      | 8908/25000 [03:19<05:38, 47.50it/s]

Ep 8900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 2


 36%|███▌      | 9007/25000 [03:21<05:37, 47.40it/s]

Ep 9000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 36%|███▋      | 9104/25000 [03:24<06:45, 39.24it/s]

Ep 9100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 37%|███▋      | 9205/25000 [03:26<07:44, 33.97it/s]

Ep 9200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 37%|███▋      | 9306/25000 [03:29<05:31, 47.40it/s]

Ep 9300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 38%|███▊      | 9406/25000 [03:31<05:26, 47.70it/s]

Ep 9400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 38%|███▊      | 9510/25000 [03:33<05:14, 49.20it/s]

Ep 9500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 38%|███▊      | 9609/25000 [03:35<05:22, 47.71it/s]

Ep 9600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 39%|███▉      | 9704/25000 [03:37<06:18, 40.43it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 39%|███▉      | 9804/25000 [03:40<07:30, 33.73it/s]

Ep 9800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 40%|███▉      | 9906/25000 [03:42<05:14, 47.94it/s]

Ep 9900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 40%|████      | 10006/25000 [03:44<05:12, 48.04it/s]

Ep 10000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 40%|████      | 10108/25000 [03:46<05:08, 48.27it/s]

Ep 10100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 41%|████      | 10208/25000 [03:48<05:07, 48.17it/s]

Ep 10200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 41%|████      | 10305/25000 [03:50<05:54, 41.40it/s]

Ep 10300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 42%|████▏     | 10403/25000 [03:53<06:50, 35.57it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 42%|████▏     | 10508/25000 [03:55<05:14, 46.03it/s]

Ep 10500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 42%|████▏     | 10607/25000 [03:57<05:04, 47.33it/s]

Ep 10600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 43%|████▎     | 10707/25000 [04:00<05:16, 45.12it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 43%|████▎     | 10805/25000 [04:02<04:58, 47.62it/s]

Ep 10800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 44%|████▎     | 10907/25000 [04:04<05:29, 42.79it/s]

Ep 10900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 44%|████▍     | 11003/25000 [04:06<06:43, 34.68it/s]

Ep 11000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 44%|████▍     | 11108/25000 [04:09<05:01, 46.06it/s]

Ep 11100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 45%|████▍     | 11208/25000 [04:11<04:51, 47.32it/s]

Ep 11200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 45%|████▌     | 11308/25000 [04:13<04:48, 47.40it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 46%|████▌     | 11409/25000 [04:15<04:44, 47.79it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 46%|████▌     | 11504/25000 [04:17<04:55, 45.71it/s]

Ep 11500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 46%|████▋     | 11604/25000 [04:20<06:24, 34.84it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 47%|████▋     | 11709/25000 [04:22<04:46, 46.43it/s]

Ep 11700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 47%|████▋     | 11805/25000 [04:24<04:38, 47.37it/s]

Ep 11800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 48%|████▊     | 11905/25000 [04:26<04:38, 47.04it/s]

Ep 11900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 48%|████▊     | 12006/25000 [04:29<04:45, 45.47it/s]

Ep 12000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 48%|████▊     | 12106/25000 [04:31<05:12, 41.28it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 49%|████▉     | 12203/25000 [04:33<06:10, 34.54it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 49%|████▉     | 12308/25000 [04:36<04:30, 46.86it/s]

Ep 12300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 50%|████▉     | 12408/25000 [04:38<04:22, 47.95it/s]

Ep 12400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 50%|█████     | 12508/25000 [04:40<04:21, 47.77it/s]

Ep 12500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 50%|█████     | 12609/25000 [04:42<04:15, 48.50it/s]

Ep 12600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 51%|█████     | 12704/25000 [04:44<04:32, 45.06it/s]

Ep 12700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 51%|█████     | 12804/25000 [04:47<05:55, 34.35it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 52%|█████▏    | 12908/25000 [04:49<04:19, 46.58it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 52%|█████▏    | 13008/25000 [04:51<04:21, 45.86it/s]

Ep 13000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 52%|█████▏    | 13109/25000 [04:54<04:10, 47.55it/s]

Ep 13100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 53%|█████▎    | 13209/25000 [04:56<04:20, 45.18it/s]

Ep 13200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 53%|█████▎    | 13307/25000 [04:58<04:43, 41.24it/s]

Ep 13300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 54%|█████▎    | 13403/25000 [05:00<06:04, 31.79it/s]

Ep 13400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13507/25000 [05:03<04:01, 47.53it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13607/25000 [05:05<04:00, 47.34it/s]

Ep 13600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 55%|█████▍    | 13710/25000 [05:07<03:46, 49.82it/s]

Ep 13700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 55%|█████▌    | 13806/25000 [05:09<04:02, 46.14it/s]

Ep 13800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 56%|█████▌    | 13908/25000 [05:11<03:54, 47.23it/s]

Ep 13900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 56%|█████▌    | 14005/25000 [05:14<05:18, 34.50it/s]

Ep 14000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 56%|█████▋    | 14107/25000 [05:16<03:52, 46.83it/s]

Ep 14100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 57%|█████▋    | 14207/25000 [05:19<03:50, 46.90it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 2


 57%|█████▋    | 14308/25000 [05:21<03:42, 48.05it/s]

Ep 14300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 58%|█████▊    | 14409/25000 [05:23<03:37, 48.66it/s]

Ep 14400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 58%|█████▊    | 14505/25000 [05:25<03:39, 47.87it/s]

Ep 14500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 58%|█████▊    | 14604/25000 [05:27<04:56, 35.03it/s]

Ep 14600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 59%|█████▉    | 14706/25000 [05:30<03:38, 47.14it/s]

Ep 14700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 59%|█████▉    | 14809/25000 [05:32<03:26, 49.43it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 60%|█████▉    | 14909/25000 [05:34<03:27, 48.75it/s]

Ep 14900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 60%|██████    | 15006/25000 [05:36<03:28, 47.99it/s]

Ep 15000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 60%|██████    | 15106/25000 [05:38<03:24, 48.28it/s]

Ep 15100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 61%|██████    | 15204/25000 [05:41<04:25, 36.86it/s]

Ep 15200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 61%|██████    | 15305/25000 [05:43<03:21, 48.21it/s]

Ep 15300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 62%|██████▏   | 15409/25000 [05:45<03:15, 49.03it/s]

Ep 15400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 62%|██████▏   | 15506/25000 [05:47<03:19, 47.49it/s]

Ep 15500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 62%|██████▏   | 15609/25000 [05:49<03:14, 48.24it/s]

Ep 15600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 63%|██████▎   | 15708/25000 [05:52<03:14, 47.75it/s]

Ep 15700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 63%|██████▎   | 15805/25000 [05:54<03:56, 38.80it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 64%|██████▎   | 15908/25000 [05:57<03:12, 47.17it/s]

Ep 15900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 64%|██████▍   | 16007/25000 [05:59<03:07, 47.88it/s]

Ep 16000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 64%|██████▍   | 16106/25000 [06:01<03:06, 47.60it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 65%|██████▍   | 16206/25000 [06:03<03:03, 48.01it/s]

Ep 16200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 65%|██████▌   | 16306/25000 [06:05<03:01, 47.92it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 66%|██████▌   | 16406/25000 [06:07<03:55, 36.48it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 66%|██████▌   | 16507/25000 [06:10<03:01, 46.82it/s]

Ep 16500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 66%|██████▋   | 16607/25000 [06:12<02:56, 47.62it/s]

Ep 16600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 67%|██████▋   | 16707/25000 [06:14<02:55, 47.20it/s]

Ep 16700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 67%|██████▋   | 16807/25000 [06:16<02:52, 47.50it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 68%|██████▊   | 16907/25000 [06:18<02:49, 47.75it/s]

Ep 16900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 68%|██████▊   | 17007/25000 [06:21<03:27, 38.46it/s]

Ep 17000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 68%|██████▊   | 17106/25000 [06:24<02:50, 46.22it/s]

Ep 17100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 69%|██████▉   | 17207/25000 [06:26<02:44, 47.29it/s]

Ep 17200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 69%|██████▉   | 17305/25000 [06:28<02:42, 47.33it/s]

Ep 17300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 70%|██████▉   | 17405/25000 [06:30<02:40, 47.27it/s]

Ep 17400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 70%|███████   | 17507/25000 [06:32<02:35, 48.22it/s]

Ep 17500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 70%|███████   | 17607/25000 [06:34<03:25, 36.03it/s]

Ep 17600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 71%|███████   | 17706/25000 [06:37<02:36, 46.64it/s]

Ep 17700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 71%|███████   | 17807/25000 [06:39<02:29, 47.97it/s]

Ep 17800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 72%|███████▏  | 17907/25000 [06:41<02:25, 48.62it/s]

Ep 17900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 72%|███████▏  | 18007/25000 [06:43<02:25, 48.20it/s]

Ep 18000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 72%|███████▏  | 18107/25000 [06:45<02:25, 47.25it/s]

Ep 18100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 73%|███████▎  | 18205/25000 [06:48<03:03, 37.08it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 73%|███████▎  | 18309/25000 [06:51<02:22, 46.83it/s]

Ep 18300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 74%|███████▎  | 18406/25000 [06:53<02:18, 47.75it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 74%|███████▍  | 18508/25000 [06:55<02:14, 48.17it/s]

Ep 18500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 74%|███████▍  | 18609/25000 [06:57<02:13, 47.79it/s]

Ep 18600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 75%|███████▍  | 18705/25000 [06:59<02:11, 47.97it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 75%|███████▌  | 18804/25000 [07:01<02:41, 38.45it/s]

Ep 18800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 76%|███████▌  | 18909/25000 [07:04<02:16, 44.73it/s]

Ep 18900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 76%|███████▌  | 19009/25000 [07:06<02:03, 48.44it/s]

Ep 19000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 76%|███████▋  | 19109/25000 [07:08<02:02, 48.17it/s]

Ep 19100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 77%|███████▋  | 19206/25000 [07:10<02:00, 48.07it/s]

Ep 19200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 77%|███████▋  | 19306/25000 [07:12<02:02, 46.29it/s]

Ep 19300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 78%|███████▊  | 19404/25000 [07:15<02:26, 38.32it/s]

Ep 19400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 78%|███████▊  | 19509/25000 [07:18<02:07, 42.98it/s]

Ep 19500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 78%|███████▊  | 19609/25000 [07:20<01:54, 47.19it/s]

Ep 19600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 79%|███████▉  | 19709/25000 [07:22<01:51, 47.25it/s]

Ep 19700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 79%|███████▉  | 19805/25000 [07:24<01:48, 47.85it/s]

Ep 19800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 80%|███████▉  | 19905/25000 [07:26<01:46, 47.70it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 80%|████████  | 20004/25000 [07:28<02:06, 39.39it/s]

Ep 20000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 80%|████████  | 20107/25000 [07:31<01:55, 42.41it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 81%|████████  | 20209/25000 [07:33<01:38, 48.57it/s]

Ep 20200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 81%|████████  | 20305/25000 [07:35<01:37, 48.38it/s]

Ep 20300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 82%|████████▏ | 20406/25000 [07:37<01:38, 46.59it/s]

Ep 20400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 82%|████████▏ | 20506/25000 [07:39<01:34, 47.74it/s]

Ep 20500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 82%|████████▏ | 20606/25000 [07:42<01:57, 37.28it/s]

Ep 20600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 83%|████████▎ | 20706/25000 [07:45<01:40, 42.60it/s]

Ep 20700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 83%|████████▎ | 20806/25000 [07:47<01:30, 46.54it/s]

Ep 20800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 84%|████████▎ | 20907/25000 [07:49<01:27, 46.94it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 84%|████████▍ | 21007/25000 [07:51<01:23, 47.85it/s]

Ep 21000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 84%|████████▍ | 21108/25000 [07:53<01:20, 48.21it/s]

Ep 21100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 85%|████████▍ | 21205/25000 [07:55<01:45, 35.80it/s]

Ep 21200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 85%|████████▌ | 21308/25000 [07:58<01:18, 46.82it/s]

Ep 21300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 86%|████████▌ | 21408/25000 [08:00<01:14, 48.52it/s]

Ep 21400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 86%|████████▌ | 21508/25000 [08:02<01:17, 44.87it/s]

Ep 21500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 86%|████████▋ | 21608/25000 [08:05<01:11, 47.60it/s]

Ep 21600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 87%|████████▋ | 21708/25000 [08:07<01:08, 48.17it/s]

Ep 21700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 87%|████████▋ | 21806/25000 [08:09<01:24, 37.70it/s]

Ep 21800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 88%|████████▊ | 21907/25000 [08:12<01:10, 43.70it/s]

Ep 21900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 88%|████████▊ | 22008/25000 [08:14<01:05, 45.52it/s]

Ep 22000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 88%|████████▊ | 22108/25000 [08:16<01:00, 47.49it/s]

Ep 22100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 89%|████████▉ | 22205/25000 [08:18<00:58, 47.43it/s]

Ep 22200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 89%|████████▉ | 22305/25000 [08:20<00:55, 48.17it/s]

Ep 22300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 90%|████████▉ | 22404/25000 [08:22<01:09, 37.17it/s]

Ep 22400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 90%|█████████ | 22509/25000 [08:25<00:56, 43.95it/s]

Ep 22500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 90%|█████████ | 22605/25000 [08:27<00:48, 49.10it/s]

Ep 22600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 91%|█████████ | 22707/25000 [08:29<00:48, 47.51it/s]

Ep 22700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 91%|█████████ | 22807/25000 [08:32<00:46, 47.07it/s]

Ep 22800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 92%|█████████▏| 22907/25000 [08:34<00:44, 46.82it/s]

Ep 22900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 92%|█████████▏| 23007/25000 [08:36<00:51, 38.54it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 92%|█████████▏| 23109/25000 [08:39<00:43, 43.12it/s]

Ep 23100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 93%|█████████▎| 23205/25000 [08:41<00:37, 47.88it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 2


 93%|█████████▎| 23306/25000 [08:43<00:36, 46.91it/s]

Ep 23300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 94%|█████████▎| 23406/25000 [08:45<00:34, 46.73it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 94%|█████████▍| 23507/25000 [08:47<00:31, 47.45it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 94%|█████████▍| 23605/25000 [08:50<00:37, 37.69it/s]

Ep 23600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 95%|█████████▍| 23704/25000 [08:52<00:30, 42.52it/s]

Ep 23700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 95%|█████████▌| 23809/25000 [08:55<00:25, 46.80it/s]

Ep 23800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 96%|█████████▌| 23909/25000 [08:57<00:23, 47.40it/s]

Ep 23900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 96%|█████████▌| 24009/25000 [08:59<00:20, 47.59it/s]

Ep 24000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 96%|█████████▋| 24105/25000 [09:01<00:18, 47.36it/s]

Ep 24100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 97%|█████████▋| 24205/25000 [09:03<00:21, 37.84it/s]

Ep 24200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 97%|█████████▋| 24309/25000 [09:06<00:16, 42.22it/s]

Ep 24300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 98%|█████████▊| 24405/25000 [09:08<00:12, 46.77it/s]

Ep 24400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 98%|█████████▊| 24506/25000 [09:10<00:10, 47.93it/s]

Ep 24500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 98%|█████████▊| 24607/25000 [09:12<00:08, 47.57it/s]

Ep 24600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 99%|█████████▉| 24707/25000 [09:14<00:06, 47.23it/s]

Ep 24700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 99%|█████████▉| 24806/25000 [09:17<00:05, 36.33it/s]

Ep 24800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


100%|█████████▉| 24905/25000 [09:19<00:02, 41.32it/s]

Ep 24900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


100%|██████████| 25000/25000 [09:21<00:00, 44.49it/s]


Ep 25000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2

TEST:


 42%|████▏     | 125/300 [00:00<00:01, 149.10it/s]

Ep 100/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.8, First Action 2


 73%|███████▎  | 220/300 [00:01<00:00, 154.32it/s]

Ep 200/300, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


100%|██████████| 300/300 [00:02<00:00, 149.99it/s]


Ep 300/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2

GAMMA 0.9 - LR 0.01 - Entropy Decay True


  0%|          | 105/25000 [00:02<08:32, 48.55it/s]

Ep 100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  1%|          | 207/25000 [00:04<08:26, 48.93it/s]

Ep 200/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 17.9, First Action 10


  1%|          | 305/25000 [00:06<11:10, 36.80it/s]

Ep 300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


  2%|▏         | 405/25000 [00:09<09:17, 44.09it/s]

Ep 400/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.4, First Action 2


  2%|▏         | 505/25000 [00:11<08:30, 47.98it/s]

Ep 500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


  2%|▏         | 606/25000 [00:13<08:45, 46.43it/s]

Ep 600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


  3%|▎         | 708/25000 [00:15<08:36, 47.07it/s]

Ep 700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


  3%|▎         | 809/25000 [00:17<08:31, 47.33it/s]

Ep 800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


  4%|▎         | 904/25000 [00:20<10:34, 37.95it/s]

Ep 900/25000, Opt. Action: 3, Reward: 8.400000005960464, Cumulative-Regret: 16.599999994039536, AVG100-Regret: 18.1, First Action 8


  4%|▍         | 1006/25000 [00:22<08:52, 45.04it/s]

Ep 1000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


  4%|▍         | 1107/25000 [00:25<08:22, 47.55it/s]

Ep 1100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 0


  5%|▍         | 1210/25000 [00:27<08:02, 49.30it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 7


  5%|▌         | 1307/25000 [00:29<08:13, 47.98it/s]

Ep 1300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  6%|▌         | 1409/25000 [00:31<08:03, 48.75it/s]

Ep 1400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


  6%|▌         | 1504/25000 [00:33<10:03, 38.94it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  6%|▋         | 1608/25000 [00:36<08:50, 44.09it/s]

Ep 1600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


  7%|▋         | 1708/25000 [00:38<08:06, 47.88it/s]

Ep 1700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


  7%|▋         | 1808/25000 [00:40<08:17, 46.63it/s]

Ep 1800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


  8%|▊         | 1908/25000 [00:42<08:23, 45.86it/s]

Ep 1900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


  8%|▊         | 2008/25000 [00:44<08:04, 47.46it/s]

Ep 2000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


  8%|▊         | 2105/25000 [00:47<10:08, 37.63it/s]

Ep 2100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


  9%|▉         | 2206/25000 [00:50<08:17, 45.80it/s]

Ep 2200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 8


  9%|▉         | 2310/25000 [00:52<07:45, 48.77it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 12.600000023841858, Cumulative-Regret: 12.399999976158142, AVG100-Regret: 18.5, First Action 0


 10%|▉         | 2406/25000 [00:54<07:48, 48.25it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 10%|█         | 2508/25000 [00:56<07:40, 48.81it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 10%|█         | 2605/25000 [00:58<07:43, 48.29it/s]

Ep 2600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 11%|█         | 2707/25000 [01:00<10:02, 36.99it/s]

Ep 2700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 11%|█         | 2807/25000 [01:03<08:18, 44.49it/s]

Ep 2800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 1


 12%|█▏        | 2909/25000 [01:05<07:44, 47.51it/s]

Ep 2900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 12%|█▏        | 3009/25000 [01:07<07:45, 47.25it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 12%|█▏        | 3106/25000 [01:09<07:30, 48.60it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 8.5, Cumulative-Regret: 16.5, AVG100-Regret: 17.9, First Action 4


 13%|█▎        | 3208/25000 [01:11<07:31, 48.21it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 5


 13%|█▎        | 3307/25000 [01:14<09:26, 38.28it/s]

Ep 3300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 14%|█▎        | 3408/25000 [01:17<08:12, 43.86it/s]

Ep 3400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 14%|█▍        | 3508/25000 [01:19<07:23, 48.46it/s]

Ep 3500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 14%|█▍        | 3609/25000 [01:21<07:18, 48.73it/s]

Ep 3600/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


 15%|█▍        | 3706/25000 [01:23<07:20, 48.33it/s]

Ep 3700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 15%|█▌        | 3807/25000 [01:25<07:21, 48.02it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 8.799999952316284, Cumulative-Regret: 16.200000047683716, AVG100-Regret: 18.0, First Action 10


 16%|█▌        | 3907/25000 [01:27<09:23, 37.45it/s]

Ep 3900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 16%|█▌        | 4006/25000 [01:30<08:02, 43.53it/s]

Ep 4000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 16%|█▋        | 4107/25000 [01:32<07:17, 47.73it/s]

Ep 4100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 17%|█▋        | 4207/25000 [01:34<07:17, 47.56it/s]

Ep 4200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 17%|█▋        | 4310/25000 [01:36<07:06, 48.48it/s]

Ep 4300/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.0, First Action 6


 18%|█▊        | 4405/25000 [01:38<07:11, 47.68it/s]

Ep 4400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 18%|█▊        | 4505/25000 [01:41<09:14, 36.93it/s]

Ep 4500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 18%|█▊        | 4608/25000 [01:44<07:54, 42.96it/s]

Ep 4600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 8


 19%|█▉        | 4708/25000 [01:46<07:11, 47.07it/s]

Ep 4700/25000, Opt. Action: 8, Reward: 8.899999976158142, Cumulative-Regret: 16.100000023841858, AVG100-Regret: 17.8, First Action 8


 19%|█▉        | 4808/25000 [01:48<06:58, 48.26it/s]

Ep 4800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 20%|█▉        | 4909/25000 [01:50<07:00, 47.75it/s]

Ep 4900/25000, Opt. Action: 0, Reward: 4.100000001490116, Cumulative-Regret: 20.899999998509884, AVG100-Regret: 17.8, First Action 10


 20%|██        | 5006/25000 [01:52<06:58, 47.76it/s]

Ep 5000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 20%|██        | 5107/25000 [01:54<09:01, 36.71it/s]

Ep 5100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 21%|██        | 5207/25000 [01:57<07:44, 42.65it/s]

Ep 5200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 21%|██        | 5307/25000 [01:59<06:54, 47.55it/s]

Ep 5300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 22%|██▏       | 5407/25000 [02:01<06:53, 47.41it/s]

Ep 5400/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 17.9, First Action 1


 22%|██▏       | 5507/25000 [02:03<06:49, 47.57it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 22%|██▏       | 5607/25000 [02:06<06:59, 46.22it/s]

Ep 5600/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 17.8, First Action 0


 23%|██▎       | 5706/25000 [02:08<08:31, 37.75it/s]

Ep 5700/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.0, First Action 7


 23%|██▎       | 5805/25000 [02:11<07:58, 40.16it/s]

Ep 5800/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 18.3, First Action 5


 24%|██▎       | 5907/25000 [02:13<06:42, 47.46it/s]

Ep 5900/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.0, First Action 10


 24%|██▍       | 6005/25000 [02:15<06:31, 48.52it/s]

Ep 6000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 24%|██▍       | 6107/25000 [02:17<06:34, 47.84it/s]

Ep 6100/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.3, First Action 1


 25%|██▍       | 6209/25000 [02:19<06:43, 46.52it/s]

Ep 6200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 6


 25%|██▌       | 6306/25000 [02:21<08:23, 37.10it/s]

Ep 6300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 26%|██▌       | 6406/25000 [02:24<07:50, 39.55it/s]

Ep 6400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 26%|██▌       | 6506/25000 [02:26<06:35, 46.81it/s]

Ep 6500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 26%|██▋       | 6607/25000 [02:28<06:34, 46.62it/s]

Ep 6600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 27%|██▋       | 6709/25000 [02:30<06:35, 46.28it/s]

Ep 6700/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.1, First Action 8


 27%|██▋       | 6809/25000 [02:33<06:26, 47.08it/s]

Ep 6800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 28%|██▊       | 6905/25000 [02:35<07:47, 38.73it/s]

Ep 6900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 6


 28%|██▊       | 7005/25000 [02:38<08:20, 35.92it/s]

Ep 7000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 28%|██▊       | 7105/25000 [02:40<06:20, 46.99it/s]

Ep 7100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.2, First Action 9


 29%|██▉       | 7205/25000 [02:42<06:14, 47.55it/s]

Ep 7200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 29%|██▉       | 7306/25000 [02:44<06:11, 47.69it/s]

Ep 7300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 8


 30%|██▉       | 7407/25000 [02:46<06:05, 48.18it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 8.899999976158142, Cumulative-Regret: 16.100000023841858, AVG100-Regret: 17.6, First Action 0


 30%|███       | 7505/25000 [02:48<07:38, 38.17it/s]

Ep 7500/25000, Opt. Action: 3, Reward: 3.800000011920929, Cumulative-Regret: 21.19999998807907, AVG100-Regret: 17.0, First Action 8


 30%|███       | 7605/25000 [02:51<08:05, 35.82it/s]

Ep 7600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 31%|███       | 7708/25000 [02:53<06:13, 46.27it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 31%|███       | 7808/25000 [02:55<06:09, 46.58it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 8


 32%|███▏      | 7908/25000 [02:58<06:01, 47.24it/s]

Ep 7900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 0


 32%|███▏      | 8008/25000 [03:00<05:57, 47.57it/s]

Ep 8000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.0, First Action 8


 32%|███▏      | 8108/25000 [03:02<07:10, 39.28it/s]

Ep 8100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 8


 33%|███▎      | 8205/25000 [03:05<08:24, 33.27it/s]

Ep 8200/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 8


 33%|███▎      | 8308/25000 [03:07<05:47, 48.06it/s]

Ep 8300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 8


 34%|███▎      | 8405/25000 [03:09<05:48, 47.59it/s]

Ep 8400/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 8


 34%|███▍      | 8509/25000 [03:11<05:44, 47.89it/s]

Ep 8500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 8


 34%|███▍      | 8609/25000 [03:13<05:48, 47.01it/s]

Ep 8600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 8


 35%|███▍      | 8704/25000 [03:15<06:49, 39.77it/s]

Ep 8700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 8


 35%|███▌      | 8805/25000 [03:18<07:55, 34.07it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 8


 36%|███▌      | 8909/25000 [03:20<05:45, 46.51it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 8


 36%|███▌      | 9009/25000 [03:22<05:36, 47.46it/s]

Ep 9000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 0


 36%|███▋      | 9109/25000 [03:25<05:42, 46.42it/s]

Ep 9100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 8


 37%|███▋      | 9210/25000 [03:27<05:24, 48.59it/s]

Ep 9200/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 16.6, First Action 8


 37%|███▋      | 9306/25000 [03:29<06:03, 43.15it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8


 38%|███▊      | 9403/25000 [03:31<07:28, 34.79it/s]

Ep 9400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 7


 38%|███▊      | 9507/25000 [03:34<05:23, 47.84it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.8, First Action 8


 38%|███▊      | 9607/25000 [03:36<05:23, 47.56it/s]

Ep 9600/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 8


 39%|███▉      | 9708/25000 [03:38<05:19, 47.94it/s]

Ep 9700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 0


 39%|███▉      | 9808/25000 [03:40<05:29, 46.16it/s]

Ep 9800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 8


 40%|███▉      | 9908/25000 [03:42<05:46, 43.53it/s]

Ep 9900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 9


 40%|████      | 10006/25000 [03:45<07:05, 35.23it/s]

Ep 10000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 8


 40%|████      | 10108/25000 [03:47<05:20, 46.52it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 8


 41%|████      | 10209/25000 [03:50<05:14, 46.98it/s]

Ep 10200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 8


 41%|████      | 10309/25000 [03:52<05:03, 48.42it/s]

Ep 10300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 8


 42%|████▏     | 10409/25000 [03:54<05:00, 48.59it/s]

Ep 10400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 8


 42%|████▏     | 10509/25000 [03:56<04:59, 48.39it/s]

Ep 10500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.6, First Action 7


 42%|████▏     | 10603/25000 [03:58<06:58, 34.43it/s]

Ep 10600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 8


 43%|████▎     | 10709/25000 [04:01<04:58, 47.82it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.8, First Action 8


 43%|████▎     | 10809/25000 [04:03<04:52, 48.56it/s]

Ep 10800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 44%|████▎     | 10909/25000 [04:05<05:03, 46.40it/s]

Ep 10900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 8


 44%|████▍     | 11005/25000 [04:07<04:51, 48.02it/s]

Ep 11000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 44%|████▍     | 11106/25000 [04:09<04:50, 47.79it/s]

Ep 11100/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.4, First Action 8


 45%|████▍     | 11204/25000 [04:12<06:39, 34.50it/s]

Ep 11200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 8


 45%|████▌     | 11310/25000 [04:15<04:41, 48.70it/s]

Ep 11300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 46%|████▌     | 11406/25000 [04:17<04:46, 47.52it/s]

Ep 11400/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.9, First Action 2


 46%|████▌     | 11506/25000 [04:19<04:42, 47.78it/s]

Ep 11500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 8


 46%|████▋     | 11609/25000 [04:21<04:36, 48.51it/s]

Ep 11600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 3


 47%|████▋     | 11709/25000 [04:23<04:35, 48.24it/s]

Ep 11700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 5


 47%|████▋     | 11805/25000 [04:25<05:41, 38.62it/s]

Ep 11800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 8


 48%|████▊     | 11907/25000 [04:28<04:34, 47.70it/s]

Ep 11900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 8


 48%|████▊     | 12007/25000 [04:30<04:29, 48.24it/s]

Ep 12000/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 16.0, First Action 2


 48%|████▊     | 12107/25000 [04:32<04:36, 46.69it/s]

Ep 12100/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 8


 49%|████▉     | 12210/25000 [04:34<04:23, 48.48it/s]

Ep 12200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 49%|████▉     | 12308/25000 [04:36<04:20, 48.71it/s]

Ep 12300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 2


 50%|████▉     | 12405/25000 [04:39<05:56, 35.31it/s]

Ep 12400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 9


 50%|█████     | 12507/25000 [04:41<04:44, 43.93it/s]

Ep 12500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 8


 50%|█████     | 12607/25000 [04:44<04:19, 47.76it/s]

Ep 12600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 7


 51%|█████     | 12707/25000 [04:46<04:14, 48.30it/s]

Ep 12700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 8


 51%|█████     | 12807/25000 [04:48<04:14, 47.90it/s]

Ep 12800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 8


 52%|█████▏    | 12909/25000 [04:50<04:07, 48.81it/s]

Ep 12900/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.2, First Action 8


 52%|█████▏    | 13006/25000 [04:52<05:20, 37.37it/s]

Ep 13000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 8


 52%|█████▏    | 13108/25000 [04:55<04:10, 47.53it/s]

Ep 13100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 8


 53%|█████▎    | 13208/25000 [04:57<04:07, 47.66it/s]

Ep 13200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.5, First Action 8


 53%|█████▎    | 13308/25000 [04:59<04:04, 47.74it/s]

Ep 13300/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.2, First Action 8


 54%|█████▎    | 13409/25000 [05:01<04:06, 46.95it/s]

Ep 13400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 9


 54%|█████▍    | 13509/25000 [05:03<04:03, 47.09it/s]

Ep 13500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 54%|█████▍    | 13606/25000 [05:06<05:07, 37.03it/s]

Ep 13600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


 55%|█████▍    | 13706/25000 [05:09<04:03, 46.43it/s]

Ep 13700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8


 55%|█████▌    | 13807/25000 [05:11<03:54, 47.70it/s]

Ep 13800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 56%|█████▌    | 13907/25000 [05:13<03:51, 47.99it/s]

Ep 13900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 56%|█████▌    | 14007/25000 [05:15<03:48, 48.20it/s]

Ep 14000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 8


 56%|█████▋    | 14107/25000 [05:17<03:45, 48.24it/s]

Ep 14100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 57%|█████▋    | 14207/25000 [05:20<04:56, 36.35it/s]

Ep 14200/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 2


 57%|█████▋    | 14308/25000 [05:22<03:53, 45.74it/s]

Ep 14300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.0, First Action 1


 58%|█████▊    | 14409/25000 [05:24<03:39, 48.35it/s]

Ep 14400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 0


 58%|█████▊    | 14509/25000 [05:26<03:40, 47.51it/s]

Ep 14500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 5


 58%|█████▊    | 14609/25000 [05:28<03:40, 47.14it/s]

Ep 14600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 2


 59%|█████▉    | 14709/25000 [05:31<03:32, 48.33it/s]

Ep 14700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 9


 59%|█████▉    | 14807/25000 [05:33<04:34, 37.13it/s]

Ep 14800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 9


 60%|█████▉    | 14906/25000 [05:36<03:38, 46.27it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.5, First Action 9


 60%|██████    | 15007/25000 [05:38<03:28, 47.84it/s]

Ep 15000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 60%|██████    | 15108/25000 [05:40<03:26, 47.99it/s]

Ep 15100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.0, First Action 9


 61%|██████    | 15208/25000 [05:42<03:49, 42.63it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 61%|██████    | 15308/25000 [05:44<03:31, 45.76it/s]

Ep 15300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 5


 62%|██████▏   | 15405/25000 [05:47<04:22, 36.57it/s]

Ep 15400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 62%|██████▏   | 15509/25000 [05:50<03:30, 45.02it/s]

Ep 15500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 62%|██████▏   | 15609/25000 [05:52<03:22, 46.40it/s]

Ep 15600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.1, First Action 4


 63%|██████▎   | 15709/25000 [05:54<03:18, 46.75it/s]

Ep 15700/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.9, First Action 5


 63%|██████▎   | 15809/25000 [05:56<03:14, 47.23it/s]

Ep 15800/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.0, First Action 1


 64%|██████▎   | 15909/25000 [05:58<03:18, 45.80it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 64%|██████▍   | 16005/25000 [06:01<03:56, 37.99it/s]

Ep 16000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 64%|██████▍   | 16106/25000 [06:03<03:14, 45.69it/s]

Ep 16100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 4


 65%|██████▍   | 16206/25000 [06:05<03:06, 47.25it/s]

Ep 16200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 2


 65%|██████▌   | 16307/25000 [06:08<02:59, 48.32it/s]

Ep 16300/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 5


 66%|██████▌   | 16408/25000 [06:10<02:59, 47.79it/s]

Ep 16400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 4


 66%|██████▌   | 16509/25000 [06:12<02:55, 48.32it/s]

Ep 16500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 66%|██████▋   | 16603/25000 [06:14<04:01, 34.70it/s]

Ep 16600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 8


 67%|██████▋   | 16709/25000 [06:17<02:53, 47.72it/s]

Ep 16700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 5


 67%|██████▋   | 16809/25000 [06:19<02:52, 47.61it/s]

Ep 16800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 1


 68%|██████▊   | 16905/25000 [06:21<02:52, 46.98it/s]

Ep 16900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 68%|██████▊   | 17005/25000 [06:23<02:48, 47.32it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 5


 68%|██████▊   | 17105/25000 [06:25<02:46, 47.39it/s]

Ep 17100/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 5


 69%|██████▉   | 17207/25000 [06:28<03:27, 37.50it/s]

Ep 17200/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.1, First Action 8


 69%|██████▉   | 17307/25000 [06:31<02:43, 47.19it/s]

Ep 17300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 4


 70%|██████▉   | 17407/25000 [06:33<02:41, 46.98it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 5


 70%|███████   | 17507/25000 [06:35<02:40, 46.76it/s]

Ep 17500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 1


 70%|███████   | 17607/25000 [06:37<02:36, 47.17it/s]

Ep 17600/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.0, First Action 0


 71%|███████   | 17707/25000 [06:39<02:32, 47.71it/s]

Ep 17700/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 9


 71%|███████   | 17806/25000 [06:42<03:15, 36.72it/s]

Ep 17800/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.5, First Action 2


 72%|███████▏  | 17905/25000 [06:44<02:41, 44.00it/s]

Ep 17900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 9


 72%|███████▏  | 18006/25000 [06:46<02:32, 45.87it/s]

Ep 18000/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.5, First Action 5


 72%|███████▏  | 18108/25000 [06:48<02:23, 48.02it/s]

Ep 18100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 73%|███████▎  | 18208/25000 [06:51<02:26, 46.35it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 0


 73%|███████▎  | 18308/25000 [06:53<02:22, 47.04it/s]

Ep 18300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 0


 74%|███████▎  | 18405/25000 [06:55<03:03, 35.90it/s]

Ep 18400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 8


 74%|███████▍  | 18506/25000 [06:58<02:20, 46.17it/s]

Ep 18500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 5


 74%|███████▍  | 18606/25000 [07:00<02:15, 47.18it/s]

Ep 18600/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.4, First Action 1


 75%|███████▍  | 18706/25000 [07:02<02:13, 46.98it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 75%|███████▌  | 18806/25000 [07:04<02:15, 45.64it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 5


 76%|███████▌  | 18906/25000 [07:06<02:08, 47.60it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 0


 76%|███████▌  | 19007/25000 [07:09<02:40, 37.26it/s]

Ep 19000/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.7, First Action 8


 76%|███████▋  | 19105/25000 [07:12<02:06, 46.74it/s]

Ep 19100/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.6, First Action 5


 77%|███████▋  | 19205/25000 [07:14<02:04, 46.59it/s]

Ep 19200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 4


 77%|███████▋  | 19305/25000 [07:16<02:02, 46.40it/s]

Ep 19300/25000, Opt. Action: 9, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.8, First Action 4


 78%|███████▊  | 19405/25000 [07:18<01:57, 47.48it/s]

Ep 19400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 0


 78%|███████▊  | 19505/25000 [07:20<01:59, 46.12it/s]

Ep 19500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 2


 78%|███████▊  | 19606/25000 [07:23<02:37, 34.20it/s]

Ep 19600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 79%|███████▉  | 19705/25000 [07:25<01:54, 46.25it/s]

Ep 19700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 8


 79%|███████▉  | 19805/25000 [07:28<01:49, 47.31it/s]

Ep 19800/25000, Opt. Action: 4, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 0


 80%|███████▉  | 19905/25000 [07:30<01:47, 47.50it/s]

Ep 19900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 8


 80%|████████  | 20005/25000 [07:32<01:45, 47.24it/s]

Ep 20000/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 0


 80%|████████  | 20106/25000 [07:34<01:44, 46.64it/s]

Ep 20100/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.1, First Action 3


 81%|████████  | 20207/25000 [07:37<02:15, 35.45it/s]

Ep 20200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 4


 81%|████████  | 20308/25000 [07:39<01:45, 44.62it/s]

Ep 20300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 82%|████████▏ | 20405/25000 [07:41<01:41, 45.10it/s]

Ep 20400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 82%|████████▏ | 20505/25000 [07:43<01:40, 44.81it/s]

Ep 20500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 4


 82%|████████▏ | 20608/25000 [07:45<01:32, 47.66it/s]

Ep 20600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.2, First Action 5


 83%|████████▎ | 20708/25000 [07:48<01:31, 47.15it/s]

Ep 20700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 0


 83%|████████▎ | 20804/25000 [07:50<01:55, 36.36it/s]

Ep 20800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 2


 84%|████████▎ | 20906/25000 [07:53<01:26, 47.48it/s]

Ep 20900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 0


 84%|████████▍ | 21004/25000 [07:55<01:23, 48.00it/s]

Ep 21000/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.2, First Action 0


 84%|████████▍ | 21105/25000 [07:57<01:22, 47.03it/s]

Ep 21100/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 2


 85%|████████▍ | 21205/25000 [07:59<01:19, 47.97it/s]

Ep 21200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 85%|████████▌ | 21305/25000 [08:01<01:17, 47.44it/s]

Ep 21300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 9


 86%|████████▌ | 21404/25000 [08:04<01:38, 36.68it/s]

Ep 21400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 0


 86%|████████▌ | 21509/25000 [08:06<01:14, 46.76it/s]

Ep 21500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.9, First Action 1


 86%|████████▋ | 21605/25000 [08:08<01:14, 45.84it/s]

Ep 21600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 2


 87%|████████▋ | 21705/25000 [08:10<01:10, 46.83it/s]

Ep 21700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 87%|████████▋ | 21805/25000 [08:13<01:07, 47.19it/s]

Ep 21800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 0


 88%|████████▊ | 21905/25000 [08:15<01:04, 47.64it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 8


 88%|████████▊ | 22006/25000 [08:17<01:23, 35.77it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 4


 88%|████████▊ | 22109/25000 [08:20<01:01, 47.38it/s]

Ep 22100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 89%|████████▉ | 22209/25000 [08:22<00:59, 47.10it/s]

Ep 22200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 89%|████████▉ | 22309/25000 [08:24<00:58, 46.27it/s]

Ep 22300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 4


 90%|████████▉ | 22409/25000 [08:26<00:54, 47.13it/s]

Ep 22400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 4


 90%|█████████ | 22509/25000 [08:28<00:52, 47.51it/s]

Ep 22500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 4


 90%|█████████ | 22604/25000 [08:31<01:00, 39.37it/s]

Ep 22600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 4


 91%|█████████ | 22706/25000 [08:33<00:49, 46.81it/s]

Ep 22700/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.9, First Action 4


 91%|█████████ | 22806/25000 [08:36<00:45, 48.26it/s]

Ep 22800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 0


 92%|█████████▏| 22905/25000 [08:38<00:43, 48.46it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 8


 92%|█████████▏| 23005/25000 [08:40<00:41, 47.59it/s]

Ep 23000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 8


 92%|█████████▏| 23107/25000 [08:42<00:38, 48.94it/s]

Ep 23100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 8


 93%|█████████▎| 23206/25000 [08:44<00:50, 35.81it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 4


 93%|█████████▎| 23306/25000 [08:47<00:36, 46.94it/s]

Ep 23300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 8


 94%|█████████▎| 23406/25000 [08:49<00:33, 47.55it/s]

Ep 23400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 8


 94%|█████████▍| 23506/25000 [08:51<00:31, 46.87it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.6, First Action 8


 94%|█████████▍| 23606/25000 [08:53<00:29, 47.70it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.3, First Action 4


 95%|█████████▍| 23707/25000 [08:55<00:27, 47.85it/s]

Ep 23700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 4


 95%|█████████▌| 23808/25000 [08:58<00:30, 38.70it/s]

Ep 23800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.1, First Action 4


 96%|█████████▌| 23907/25000 [09:01<00:23, 46.64it/s]

Ep 23900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 4


 96%|█████████▌| 24007/25000 [09:03<00:20, 48.04it/s]

Ep 24000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 96%|█████████▋| 24107/25000 [09:05<00:18, 47.81it/s]

Ep 24100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 97%|█████████▋| 24207/25000 [09:07<00:16, 47.96it/s]

Ep 24200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 97%|█████████▋| 24309/25000 [09:09<00:14, 47.33it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 4


 98%|█████████▊| 24406/25000 [09:12<00:15, 37.47it/s]

Ep 24400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 98%|█████████▊| 24508/25000 [09:14<00:11, 44.21it/s]

Ep 24500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 4


 98%|█████████▊| 24607/25000 [09:16<00:08, 47.55it/s]

Ep 24600/25000, Opt. Action: 4, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.4, First Action 8


 99%|█████████▉| 24707/25000 [09:18<00:06, 47.14it/s]

Ep 24700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 4


 99%|█████████▉| 24805/25000 [09:20<00:04, 47.98it/s]

Ep 24800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


100%|█████████▉| 24905/25000 [09:23<00:02, 47.30it/s]

Ep 24900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


100%|██████████| 25000/25000 [09:25<00:00, 44.22it/s]


Ep 25000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4

TEST:


 39%|███▊      | 116/300 [00:01<00:01, 107.17it/s]

Ep 100/300, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 76%|███████▌  | 228/300 [00:02<00:00, 131.25it/s]

Ep 200/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


100%|██████████| 300/300 [00:02<00:00, 119.57it/s]


Ep 300/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 4

GAMMA 1 - LR 0.0001 - Entropy Decay False


  0%|          | 105/25000 [00:02<08:44, 47.47it/s]

Ep 100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


  1%|          | 207/25000 [00:04<08:44, 47.29it/s]

Ep 200/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 18.4, First Action 10


  1%|          | 307/25000 [00:06<08:48, 46.69it/s]

Ep 300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 8


  2%|▏         | 408/25000 [00:08<08:23, 48.85it/s]

Ep 400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


  2%|▏         | 505/25000 [00:11<11:08, 36.63it/s]

Ep 500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


  2%|▏         | 605/25000 [00:13<08:41, 46.75it/s]

Ep 600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 6


  3%|▎         | 707/25000 [00:15<08:31, 47.48it/s]

Ep 700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


  3%|▎         | 807/25000 [00:17<08:21, 48.27it/s]

Ep 800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


  4%|▎         | 908/25000 [00:20<08:21, 48.08it/s]

Ep 900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


  4%|▍         | 1009/25000 [00:22<08:18, 48.12it/s]

Ep 1000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


  4%|▍         | 1104/25000 [00:24<10:21, 38.47it/s]

Ep 1100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


  5%|▍         | 1209/25000 [00:27<08:22, 47.32it/s]

Ep 1200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


  5%|▌         | 1309/25000 [00:29<08:28, 46.58it/s]

Ep 1300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


  6%|▌         | 1406/25000 [00:31<08:25, 46.63it/s]

Ep 1400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


  6%|▌         | 1508/25000 [00:33<08:06, 48.24it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.1, First Action 10


  6%|▋         | 1609/25000 [00:35<08:11, 47.58it/s]

Ep 1600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 2


  7%|▋         | 1704/25000 [00:38<10:12, 38.03it/s]

Ep 1700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


  7%|▋         | 1805/25000 [00:40<08:30, 45.42it/s]

Ep 1800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 2


  8%|▊         | 1907/25000 [00:42<07:49, 49.14it/s]

Ep 1900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


  8%|▊         | 2008/25000 [00:44<07:57, 48.16it/s]

Ep 2000/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


  8%|▊         | 2109/25000 [00:47<08:01, 47.51it/s]

Ep 2100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


  9%|▉         | 2208/25000 [00:49<07:51, 48.34it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 0


  9%|▉         | 2307/25000 [00:51<10:11, 37.13it/s]

Ep 2300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 10%|▉         | 2409/25000 [00:54<08:14, 45.65it/s]

Ep 2400/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 0


 10%|█         | 2510/25000 [00:56<07:44, 48.40it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 10%|█         | 2606/25000 [00:58<07:40, 48.59it/s]

Ep 2600/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.3, First Action 10


 11%|█         | 2706/25000 [01:00<07:47, 47.70it/s]

Ep 2700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 11%|█         | 2807/25000 [01:02<07:37, 48.51it/s]

Ep 2800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 12%|█▏        | 2906/25000 [01:05<09:59, 36.84it/s]

Ep 2900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 12%|█▏        | 3006/25000 [01:07<08:13, 44.59it/s]

Ep 3000/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 12%|█▏        | 3106/25000 [01:09<07:31, 48.48it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 6


 13%|█▎        | 3206/25000 [01:12<07:31, 48.31it/s]

Ep 3200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 13%|█▎        | 3307/25000 [01:14<07:26, 48.63it/s]

Ep 3300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 14%|█▎        | 3408/25000 [01:16<07:45, 46.40it/s]

Ep 3400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 6


 14%|█▍        | 3504/25000 [01:18<09:31, 37.63it/s]

Ep 3500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 0


 14%|█▍        | 3609/25000 [01:21<08:06, 43.92it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 15%|█▍        | 3706/25000 [01:23<07:33, 46.99it/s]

Ep 3700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 15%|█▌        | 3806/25000 [01:25<07:28, 47.22it/s]

Ep 3800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 2


 16%|█▌        | 3906/25000 [01:27<07:39, 45.93it/s]

Ep 3900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 4


 16%|█▌        | 4007/25000 [01:29<07:25, 47.13it/s]

Ep 4000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 6


 16%|█▋        | 4105/25000 [01:32<09:16, 37.56it/s]

Ep 4100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 17%|█▋        | 4205/25000 [01:35<08:08, 42.56it/s]

Ep 4200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 17%|█▋        | 4305/25000 [01:37<07:27, 46.27it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 18%|█▊        | 4406/25000 [01:39<07:23, 46.40it/s]

Ep 4400/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 0


 18%|█▊        | 4507/25000 [01:41<07:17, 46.84it/s]

Ep 4500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 18%|█▊        | 4607/25000 [01:43<07:16, 46.74it/s]

Ep 4600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 4


 19%|█▉        | 4705/25000 [01:46<08:54, 37.97it/s]

Ep 4700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 19%|█▉        | 4807/25000 [01:48<07:30, 44.78it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 20%|█▉        | 4909/25000 [01:50<06:47, 49.30it/s]

Ep 4900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


 20%|██        | 5009/25000 [01:53<07:00, 47.56it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 20%|██        | 5105/25000 [01:55<06:54, 48.01it/s]

Ep 5100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 21%|██        | 5205/25000 [01:57<06:47, 48.58it/s]

Ep 5200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 21%|██        | 5307/25000 [01:59<08:57, 36.61it/s]

Ep 5300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 22%|██▏       | 5407/25000 [02:02<07:31, 43.40it/s]

Ep 5400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 22%|██▏       | 5507/25000 [02:04<06:53, 47.18it/s]

Ep 5500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 22%|██▏       | 5608/25000 [02:06<06:49, 47.40it/s]

Ep 5600/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 0


 23%|██▎       | 5709/25000 [02:08<06:40, 48.22it/s]

Ep 5700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 23%|██▎       | 5806/25000 [02:10<06:36, 48.39it/s]

Ep 5800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 24%|██▎       | 5905/25000 [02:13<08:26, 37.68it/s]

Ep 5900/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 3


 24%|██▍       | 6006/25000 [02:15<07:19, 43.23it/s]

Ep 6000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 24%|██▍       | 6106/25000 [02:18<06:47, 46.34it/s]

Ep 6100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 25%|██▍       | 6208/25000 [02:20<06:28, 48.38it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 25%|██▌       | 6308/25000 [02:22<06:36, 47.20it/s]

Ep 6300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 7


 26%|██▌       | 6405/25000 [02:24<06:35, 47.07it/s]

Ep 6400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 26%|██▌       | 6507/25000 [02:26<08:25, 36.62it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 26%|██▋       | 6609/25000 [02:29<07:15, 42.22it/s]

Ep 6600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 27%|██▋       | 6706/25000 [02:31<06:14, 48.88it/s]

Ep 6700/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 0


 27%|██▋       | 6807/25000 [02:33<06:10, 49.10it/s]

Ep 6800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 28%|██▊       | 6907/25000 [02:35<06:26, 46.80it/s]

Ep 6900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 8


 28%|██▊       | 7007/25000 [02:37<06:15, 47.96it/s]

Ep 7000/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 28%|██▊       | 7106/25000 [02:40<08:08, 36.65it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.5, First Action 5


 29%|██▉       | 7206/25000 [02:43<08:49, 33.62it/s]

Ep 7200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


 29%|██▉       | 7307/25000 [02:45<06:08, 47.96it/s]

Ep 7300/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 30%|██▉       | 7408/25000 [02:47<06:14, 46.98it/s]

Ep 7400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


 30%|███       | 7508/25000 [02:49<06:26, 45.28it/s]

Ep 7500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 30%|███       | 7608/25000 [02:51<06:13, 46.53it/s]

Ep 7600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 31%|███       | 7705/25000 [02:53<07:43, 37.34it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 31%|███       | 7806/25000 [02:56<08:12, 34.90it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 32%|███▏      | 7905/25000 [02:58<05:59, 47.54it/s]

Ep 7900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 32%|███▏      | 8005/25000 [03:00<06:01, 46.98it/s]

Ep 8000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 32%|███▏      | 8107/25000 [03:03<06:03, 46.49it/s]

Ep 8100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 0


 33%|███▎      | 8207/25000 [03:05<05:59, 46.74it/s]

Ep 8200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 33%|███▎      | 8307/25000 [03:07<07:14, 38.39it/s]

Ep 8300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 34%|███▎      | 8404/25000 [03:10<08:22, 33.01it/s]

Ep 8400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 6


 34%|███▍      | 8506/25000 [03:12<05:53, 46.63it/s]

Ep 8500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 34%|███▍      | 8609/25000 [03:14<05:47, 47.19it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 35%|███▍      | 8709/25000 [03:16<05:51, 46.40it/s]

Ep 8700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 35%|███▌      | 8806/25000 [03:18<05:52, 45.98it/s]

Ep 8800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 36%|███▌      | 8906/25000 [03:20<06:43, 39.90it/s]

Ep 8900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 36%|███▌      | 9003/25000 [03:23<07:45, 34.37it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 7


 36%|███▋      | 9105/25000 [03:25<05:26, 48.64it/s]

Ep 9100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 37%|███▋      | 9205/25000 [03:28<05:31, 47.59it/s]

Ep 9200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 37%|███▋      | 9306/25000 [03:30<05:26, 48.07it/s]

Ep 9300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 38%|███▊      | 9406/25000 [03:32<05:34, 46.68it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 38%|███▊      | 9503/25000 [03:34<05:46, 44.67it/s]

Ep 9500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 38%|███▊      | 9604/25000 [03:37<07:12, 35.60it/s]

Ep 9600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 39%|███▉      | 9707/25000 [03:39<05:19, 47.92it/s]

Ep 9700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 39%|███▉      | 9807/25000 [03:41<05:16, 48.07it/s]

Ep 9800/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 7


 40%|███▉      | 9907/25000 [03:43<05:18, 47.43it/s]

Ep 9900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 40%|████      | 10007/25000 [03:45<05:10, 48.26it/s]

Ep 10000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 40%|████      | 10107/25000 [03:48<05:10, 47.93it/s]

Ep 10100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 8


 41%|████      | 10205/25000 [03:50<07:14, 34.06it/s]

Ep 10200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 7


 41%|████      | 10305/25000 [03:53<05:04, 48.30it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 42%|████▏     | 10406/25000 [03:55<05:11, 46.86it/s]

Ep 10400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 42%|████▏     | 10506/25000 [03:57<05:13, 46.23it/s]

Ep 10500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 42%|████▏     | 10606/25000 [03:59<05:06, 46.94it/s]

Ep 10600/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 7


 43%|████▎     | 10707/25000 [04:01<04:54, 48.49it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 43%|████▎     | 10804/25000 [04:04<06:06, 38.77it/s]

Ep 10800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 44%|████▎     | 10906/25000 [04:06<05:00, 46.86it/s]

Ep 10900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 44%|████▍     | 11006/25000 [04:08<04:55, 47.30it/s]

Ep 11000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 44%|████▍     | 11106/25000 [04:10<04:50, 47.88it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 0


 45%|████▍     | 11206/25000 [04:13<04:49, 47.68it/s]

Ep 11200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 45%|████▌     | 11306/25000 [04:15<04:44, 48.17it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 0


 46%|████▌     | 11406/25000 [04:17<06:00, 37.75it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


 46%|████▌     | 11507/25000 [04:20<04:49, 46.65it/s]

Ep 11500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 7


 46%|████▋     | 11609/25000 [04:22<04:48, 46.49it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 47%|████▋     | 11705/25000 [04:24<04:37, 47.89it/s]

Ep 11700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 47%|████▋     | 11806/25000 [04:26<04:34, 48.03it/s]

Ep 11800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 6


 48%|████▊     | 11908/25000 [04:28<04:26, 49.18it/s]

Ep 11900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 3


 48%|████▊     | 12005/25000 [04:31<05:53, 36.72it/s]

Ep 12000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 48%|████▊     | 12109/25000 [04:34<04:35, 46.81it/s]

Ep 12100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 49%|████▉     | 12209/25000 [04:36<04:26, 48.07it/s]

Ep 12200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 49%|████▉     | 12309/25000 [04:38<04:28, 47.26it/s]

Ep 12300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 50%|████▉     | 12405/25000 [04:40<04:27, 47.01it/s]

Ep 12400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 50%|█████     | 12506/25000 [04:42<04:27, 46.62it/s]

Ep 12500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 50%|█████     | 12607/25000 [04:45<05:36, 36.79it/s]

Ep 12600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


 51%|█████     | 12706/25000 [04:47<04:25, 46.26it/s]

Ep 12700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 51%|█████     | 12807/25000 [04:49<04:28, 45.39it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 52%|█████▏    | 12907/25000 [04:51<04:19, 46.64it/s]

Ep 12900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 52%|█████▏    | 13007/25000 [04:54<04:15, 46.95it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 52%|█████▏    | 13107/25000 [04:56<04:18, 46.03it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 53%|█████▎    | 13203/25000 [04:58<05:17, 37.18it/s]

Ep 13200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 53%|█████▎    | 13309/25000 [05:01<04:15, 45.72it/s]

Ep 13300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 54%|█████▎    | 13409/25000 [05:03<04:07, 46.77it/s]

Ep 13400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 54%|█████▍    | 13509/25000 [05:05<04:04, 47.04it/s]

Ep 13500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 54%|█████▍    | 13609/25000 [05:07<03:57, 48.03it/s]

Ep 13600/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.3, First Action 7


 55%|█████▍    | 13709/25000 [05:10<03:56, 47.82it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 55%|█████▌    | 13805/25000 [05:12<04:56, 37.77it/s]

Ep 13800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 56%|█████▌    | 13908/25000 [05:15<03:54, 47.31it/s]

Ep 13900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 56%|█████▌    | 14008/25000 [05:17<03:55, 46.76it/s]

Ep 14000/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 8


 56%|█████▋    | 14109/25000 [05:19<03:53, 46.65it/s]

Ep 14100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 57%|█████▋    | 14210/25000 [05:21<03:41, 48.71it/s]

Ep 14200/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 0


 57%|█████▋    | 14305/25000 [05:23<03:56, 45.24it/s]

Ep 14300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 58%|█████▊    | 14405/25000 [05:26<04:56, 35.71it/s]

Ep 14400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 58%|█████▊    | 14507/25000 [05:28<03:42, 47.20it/s]

Ep 14500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 58%|█████▊    | 14607/25000 [05:30<03:38, 47.51it/s]

Ep 14600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 7


 59%|█████▉    | 14709/25000 [05:33<03:36, 47.59it/s]

Ep 14700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 59%|█████▉    | 14809/25000 [05:35<03:34, 47.43it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 5


 60%|█████▉    | 14909/25000 [05:37<03:34, 47.06it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 60%|██████    | 15005/25000 [05:39<04:36, 36.21it/s]

Ep 15000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 6


 60%|██████    | 15107/25000 [05:42<03:29, 47.18it/s]

Ep 15100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 61%|██████    | 15207/25000 [05:44<03:25, 47.61it/s]

Ep 15200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 61%|██████    | 15308/25000 [05:46<03:24, 47.46it/s]

Ep 15300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 62%|██████▏   | 15409/25000 [05:48<03:24, 46.92it/s]

Ep 15400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 62%|██████▏   | 15509/25000 [05:51<03:19, 47.55it/s]

Ep 15500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


 62%|██████▏   | 15608/25000 [05:53<04:06, 38.06it/s]

Ep 15600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 63%|██████▎   | 15709/25000 [05:56<03:23, 45.56it/s]

Ep 15700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 63%|██████▎   | 15804/25000 [05:58<03:18, 46.40it/s]

Ep 15800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 64%|██████▎   | 15909/25000 [06:00<03:17, 46.10it/s]

Ep 15900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 64%|██████▍   | 16009/25000 [06:02<03:13, 46.58it/s]

Ep 16000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 64%|██████▍   | 16109/25000 [06:04<03:05, 47.84it/s]

Ep 16100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 65%|██████▍   | 16206/25000 [06:07<03:59, 36.78it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 65%|██████▌   | 16307/25000 [06:09<03:10, 45.56it/s]

Ep 16300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 8


 66%|██████▌   | 16408/25000 [06:11<02:59, 47.76it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 66%|██████▌   | 16509/25000 [06:14<02:55, 48.29it/s]

Ep 16500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 0


 66%|██████▋   | 16609/25000 [06:16<02:55, 47.76it/s]

Ep 16600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 67%|██████▋   | 16709/25000 [06:18<02:56, 47.01it/s]

Ep 16700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 67%|██████▋   | 16803/25000 [06:20<03:39, 37.42it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 68%|██████▊   | 16909/25000 [06:23<02:56, 45.88it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 68%|██████▊   | 17009/25000 [06:25<02:48, 47.32it/s]

Ep 17000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 68%|██████▊   | 17109/25000 [06:27<02:46, 47.37it/s]

Ep 17100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 69%|██████▉   | 17209/25000 [06:29<02:43, 47.53it/s]

Ep 17200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.2, First Action 4


 69%|██████▉   | 17309/25000 [06:31<02:41, 47.70it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 0


 70%|██████▉   | 17405/25000 [06:34<03:29, 36.20it/s]

Ep 17400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 70%|███████   | 17508/25000 [06:37<02:41, 46.33it/s]

Ep 17500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 70%|███████   | 17608/25000 [06:39<02:39, 46.36it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 71%|███████   | 17708/25000 [06:41<02:35, 46.96it/s]

Ep 17700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 71%|███████   | 17808/25000 [06:43<02:32, 47.10it/s]

Ep 17800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 0


 72%|███████▏  | 17908/25000 [06:45<02:37, 45.03it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 7


 72%|███████▏  | 18006/25000 [06:48<03:11, 36.59it/s]

Ep 18000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 72%|███████▏  | 18105/25000 [06:50<02:35, 44.23it/s]

Ep 18100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 73%|███████▎  | 18205/25000 [06:52<02:25, 46.58it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 73%|███████▎  | 18305/25000 [06:55<02:26, 45.77it/s]

Ep 18300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 74%|███████▎  | 18405/25000 [06:57<02:22, 46.25it/s]

Ep 18400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 74%|███████▍  | 18505/25000 [06:59<02:19, 46.68it/s]

Ep 18500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 74%|███████▍  | 18605/25000 [07:01<02:44, 38.80it/s]

Ep 18600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 75%|███████▍  | 18709/25000 [07:04<02:16, 46.09it/s]

Ep 18700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


 75%|███████▌  | 18809/25000 [07:06<02:11, 47.09it/s]

Ep 18800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 76%|███████▌  | 18909/25000 [07:08<02:10, 46.76it/s]

Ep 18900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


 76%|███████▌  | 19009/25000 [07:10<02:06, 47.48it/s]

Ep 19000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 76%|███████▋  | 19109/25000 [07:13<02:03, 47.82it/s]

Ep 19100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 77%|███████▋  | 19206/25000 [07:15<02:35, 37.32it/s]

Ep 19200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 77%|███████▋  | 19309/25000 [07:18<02:03, 46.17it/s]

Ep 19300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 0


 78%|███████▊  | 19409/25000 [07:20<02:00, 46.39it/s]

Ep 19400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 78%|███████▊  | 19509/25000 [07:22<01:56, 47.05it/s]

Ep 19500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.6, First Action 8


 78%|███████▊  | 19609/25000 [07:24<01:54, 47.19it/s]

Ep 19600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 79%|███████▉  | 19709/25000 [07:26<01:50, 47.70it/s]

Ep 19700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 79%|███████▉  | 19806/25000 [07:29<02:19, 37.30it/s]

Ep 19800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 80%|███████▉  | 19906/25000 [07:31<01:50, 46.22it/s]

Ep 19900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 80%|████████  | 20006/25000 [07:33<01:43, 48.10it/s]

Ep 20000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 80%|████████  | 20106/25000 [07:36<01:42, 47.84it/s]

Ep 20100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 81%|████████  | 20206/25000 [07:38<01:40, 47.65it/s]

Ep 20200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 81%|████████  | 20307/25000 [07:40<01:40, 46.79it/s]

Ep 20300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 82%|████████▏ | 20406/25000 [07:42<02:00, 38.07it/s]

Ep 20400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 82%|████████▏ | 20508/25000 [07:45<01:38, 45.49it/s]

Ep 20500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 82%|████████▏ | 20608/25000 [07:47<01:32, 47.50it/s]

Ep 20600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 83%|████████▎ | 20708/25000 [07:49<01:29, 47.77it/s]

Ep 20700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 83%|████████▎ | 20808/25000 [07:51<01:27, 47.68it/s]

Ep 20800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 84%|████████▎ | 20905/25000 [07:53<01:23, 49.22it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 84%|████████▍ | 21006/25000 [07:56<01:41, 39.41it/s]

Ep 21000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 84%|████████▍ | 21109/25000 [07:59<01:28, 43.78it/s]

Ep 21100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 85%|████████▍ | 21209/25000 [08:01<01:23, 45.46it/s]

Ep 21200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 85%|████████▌ | 21309/25000 [08:03<01:20, 45.91it/s]

Ep 21300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 86%|████████▌ | 21409/25000 [08:05<01:17, 46.25it/s]

Ep 21400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 86%|████████▌ | 21509/25000 [08:07<01:14, 46.58it/s]

Ep 21500/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 4


 86%|████████▋ | 21607/25000 [08:10<01:30, 37.66it/s]

Ep 21600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 0


 87%|████████▋ | 21709/25000 [08:12<01:18, 41.76it/s]

Ep 21700/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 0


 87%|████████▋ | 21809/25000 [08:15<01:08, 46.83it/s]

Ep 21800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 88%|████████▊ | 21909/25000 [08:17<01:05, 47.35it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 88%|████████▊ | 22009/25000 [08:19<01:05, 45.72it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 88%|████████▊ | 22104/25000 [08:21<01:03, 45.60it/s]

Ep 22100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 89%|████████▉ | 22207/25000 [08:23<01:12, 38.56it/s]

Ep 22200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 89%|████████▉ | 22309/25000 [08:26<01:05, 41.28it/s]

Ep 22300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 90%|████████▉ | 22409/25000 [08:28<00:54, 47.12it/s]

Ep 22400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 90%|█████████ | 22509/25000 [08:30<00:52, 47.60it/s]

Ep 22500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 0


 90%|█████████ | 22609/25000 [08:33<00:50, 47.67it/s]

Ep 22600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 91%|█████████ | 22709/25000 [08:35<00:49, 46.62it/s]

Ep 22700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 91%|█████████ | 22806/25000 [08:37<00:59, 36.92it/s]

Ep 22800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 8


 92%|█████████▏| 22905/25000 [08:40<00:53, 38.90it/s]

Ep 22900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 7


 92%|█████████▏| 23005/25000 [08:42<00:42, 46.87it/s]

Ep 23000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 92%|█████████▏| 23105/25000 [08:44<00:40, 46.30it/s]

Ep 23100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 8


 93%|█████████▎| 23205/25000 [08:46<00:37, 47.85it/s]

Ep 23200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 93%|█████████▎| 23305/25000 [08:48<00:35, 48.07it/s]

Ep 23300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 94%|█████████▎| 23404/25000 [08:51<00:43, 37.08it/s]

Ep 23400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 94%|█████████▍| 23505/25000 [08:54<00:39, 37.99it/s]

Ep 23500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 94%|█████████▍| 23605/25000 [08:56<00:28, 48.36it/s]

Ep 23600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 95%|█████████▍| 23706/25000 [08:58<00:28, 45.83it/s]

Ep 23700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 8


 95%|█████████▌| 23806/25000 [09:00<00:26, 45.76it/s]

Ep 23800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 96%|█████████▌| 23906/25000 [09:02<00:23, 47.55it/s]

Ep 23900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 96%|█████████▌| 24007/25000 [09:04<00:25, 38.76it/s]

Ep 24000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 96%|█████████▋| 24104/25000 [09:07<00:26, 33.68it/s]

Ep 24100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 97%|█████████▋| 24208/25000 [09:09<00:16, 47.03it/s]

Ep 24200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 97%|█████████▋| 24308/25000 [09:11<00:14, 47.59it/s]

Ep 24300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 98%|█████████▊| 24408/25000 [09:14<00:12, 46.90it/s]

Ep 24400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 4


 98%|█████████▊| 24508/25000 [09:16<00:10, 47.40it/s]

Ep 24500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 98%|█████████▊| 24606/25000 [09:18<00:10, 39.32it/s]

Ep 24600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 99%|█████████▉| 24703/25000 [09:21<00:08, 33.47it/s]

Ep 24700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 0


 99%|█████████▉| 24806/25000 [09:23<00:04, 47.08it/s]

Ep 24800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


100%|█████████▉| 24906/25000 [09:25<00:01, 47.67it/s]

Ep 24900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


100%|██████████| 25000/25000 [09:27<00:00, 44.04it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.4, First Action 0

TEST:


 42%|████▏     | 126/300 [00:00<00:01, 150.37it/s]

Ep 100/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 0


 74%|███████▍  | 222/300 [00:01<00:00, 150.84it/s]

Ep 200/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


100%|██████████| 300/300 [00:02<00:00, 149.37it/s]


Ep 300/300, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7

GAMMA 0.9 - LR 0.1 - Entropy Decay False


  0%|          | 106/25000 [00:02<11:33, 35.89it/s]

Ep 100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  1%|          | 206/25000 [00:05<11:55, 34.66it/s]

Ep 200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  1%|          | 307/25000 [00:07<08:53, 46.25it/s]

Ep 300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  2%|▏         | 407/25000 [00:09<08:39, 47.37it/s]

Ep 400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  2%|▏         | 507/25000 [00:11<08:37, 47.35it/s]

Ep 500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  2%|▏         | 607/25000 [00:13<08:29, 47.88it/s]

Ep 600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  3%|▎         | 704/25000 [00:16<10:43, 37.77it/s]

Ep 700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  3%|▎         | 806/25000 [00:18<11:53, 33.91it/s]

Ep 800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  4%|▎         | 906/25000 [00:21<08:28, 47.39it/s]

Ep 900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  4%|▍         | 1006/25000 [00:23<08:24, 47.56it/s]

Ep 1000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


  4%|▍         | 1106/25000 [00:25<08:18, 47.93it/s]

Ep 1100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


  5%|▍         | 1206/25000 [00:27<08:27, 46.92it/s]

Ep 1200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


  5%|▌         | 1304/25000 [00:29<10:29, 37.67it/s]

Ep 1300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  6%|▌         | 1403/25000 [00:32<11:20, 34.66it/s]

Ep 1400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


  6%|▌         | 1505/25000 [00:34<08:20, 46.92it/s]

Ep 1500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  6%|▋         | 1605/25000 [00:36<08:11, 47.59it/s]

Ep 1600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  7%|▋         | 1705/25000 [00:39<08:16, 46.95it/s]

Ep 1700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  7%|▋         | 1805/25000 [00:41<08:11, 47.16it/s]

Ep 1800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  8%|▊         | 1904/25000 [00:43<09:52, 38.98it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


  8%|▊         | 2004/25000 [00:46<11:18, 33.90it/s]

Ep 2000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  8%|▊         | 2106/25000 [00:48<08:12, 46.46it/s]

Ep 2100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


  9%|▉         | 2206/25000 [00:50<08:19, 45.68it/s]

Ep 2200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


  9%|▉         | 2306/25000 [00:52<08:34, 44.11it/s]

Ep 2300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 10%|▉         | 2407/25000 [00:54<08:03, 46.75it/s]

Ep 2400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 10%|█         | 2506/25000 [00:57<09:42, 38.58it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 10%|█         | 2606/25000 [00:59<10:47, 34.58it/s]

Ep 2600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 11%|█         | 2708/25000 [01:02<07:52, 47.14it/s]

Ep 2700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 11%|█         | 2808/25000 [01:04<07:41, 48.11it/s]

Ep 2800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 12%|█▏        | 2908/25000 [01:06<07:55, 46.47it/s]

Ep 2900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 12%|█▏        | 3008/25000 [01:08<07:52, 46.53it/s]

Ep 3000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 12%|█▏        | 3107/25000 [01:10<09:43, 37.54it/s]

Ep 3100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 13%|█▎        | 3204/25000 [01:13<11:04, 32.81it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 13%|█▎        | 3305/25000 [01:15<07:37, 47.39it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 14%|█▎        | 3405/25000 [01:17<07:29, 48.07it/s]

Ep 3400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 14%|█▍        | 3507/25000 [01:20<07:20, 48.78it/s]

Ep 3500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 14%|█▍        | 3607/25000 [01:22<07:27, 47.85it/s]

Ep 3600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 15%|█▍        | 3703/25000 [01:24<08:41, 40.87it/s]

Ep 3700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 15%|█▌        | 3803/25000 [01:27<10:14, 34.51it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 16%|█▌        | 3907/25000 [01:29<07:32, 46.66it/s]

Ep 3900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 16%|█▌        | 4007/25000 [01:31<07:29, 46.74it/s]

Ep 4000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 16%|█▋        | 4108/25000 [01:33<07:26, 46.77it/s]

Ep 4100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 17%|█▋        | 4208/25000 [01:36<07:31, 46.03it/s]

Ep 4200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 17%|█▋        | 4303/25000 [01:38<08:24, 41.01it/s]

Ep 4300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 18%|█▊        | 4404/25000 [01:41<10:05, 34.00it/s]

Ep 4400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 18%|█▊        | 4508/25000 [01:43<07:11, 47.49it/s]

Ep 4500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 18%|█▊        | 4608/25000 [01:45<07:26, 45.65it/s]

Ep 4600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 19%|█▉        | 4708/25000 [01:47<07:19, 46.13it/s]

Ep 4700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 19%|█▉        | 4808/25000 [01:49<07:09, 47.04it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 20%|█▉        | 4903/25000 [01:51<08:28, 39.52it/s]

Ep 4900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 20%|██        | 5005/25000 [01:54<09:44, 34.19it/s]

Ep 5000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 20%|██        | 5109/25000 [01:57<07:02, 47.12it/s]

Ep 5100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 21%|██        | 5209/25000 [01:59<06:56, 47.55it/s]

Ep 5200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 21%|██        | 5309/25000 [02:01<06:58, 47.10it/s]

Ep 5300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 22%|██▏       | 5409/25000 [02:03<06:50, 47.67it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 22%|██▏       | 5504/25000 [02:05<07:55, 41.00it/s]

Ep 5500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 22%|██▏       | 5606/25000 [02:08<09:38, 33.50it/s]

Ep 5600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 23%|██▎       | 5706/25000 [02:10<06:47, 47.29it/s]

Ep 5700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 23%|██▎       | 5806/25000 [02:13<06:41, 47.80it/s]

Ep 5800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 24%|██▎       | 5906/25000 [02:15<06:42, 47.47it/s]

Ep 5900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 24%|██▍       | 6006/25000 [02:17<06:44, 46.94it/s]

Ep 6000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 24%|██▍       | 6106/25000 [02:19<07:41, 40.94it/s]

Ep 6100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 25%|██▍       | 6205/25000 [02:22<09:14, 33.89it/s]

Ep 6200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 25%|██▌       | 6305/25000 [02:24<06:47, 45.92it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 26%|██▌       | 6405/25000 [02:26<06:42, 46.23it/s]

Ep 6400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 26%|██▌       | 6505/25000 [02:29<06:36, 46.62it/s]

Ep 6500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 26%|██▋       | 6605/25000 [02:31<06:45, 45.39it/s]

Ep 6600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 27%|██▋       | 6704/25000 [02:33<08:07, 37.56it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 27%|██▋       | 6807/25000 [02:36<08:49, 34.38it/s]

Ep 6800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 28%|██▊       | 6907/25000 [02:38<06:26, 46.85it/s]

Ep 6900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 28%|██▊       | 7007/25000 [02:40<06:21, 47.15it/s]

Ep 7000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 28%|██▊       | 7107/25000 [02:42<06:15, 47.67it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 29%|██▉       | 7207/25000 [02:44<06:15, 47.39it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 29%|██▉       | 7307/25000 [02:47<07:19, 40.28it/s]

Ep 7300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 2


 30%|██▉       | 7405/25000 [02:49<08:35, 34.11it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 30%|███       | 7509/25000 [02:52<06:11, 47.09it/s]

Ep 7500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 30%|███       | 7609/25000 [02:54<06:17, 46.09it/s]

Ep 7600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 31%|███       | 7705/25000 [02:56<06:17, 45.84it/s]

Ep 7700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 31%|███       | 7805/25000 [02:58<06:06, 46.86it/s]

Ep 7800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 32%|███▏      | 7905/25000 [03:00<06:56, 41.07it/s]

Ep 7900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 32%|███▏      | 8004/25000 [03:03<08:05, 35.01it/s]

Ep 8000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 32%|███▏      | 8106/25000 [03:05<06:12, 45.32it/s]

Ep 8100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 33%|███▎      | 8206/25000 [03:08<06:03, 46.25it/s]

Ep 8200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


 33%|███▎      | 8306/25000 [03:10<06:02, 46.11it/s]

Ep 8300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 34%|███▎      | 8406/25000 [03:12<05:51, 47.17it/s]

Ep 8400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 34%|███▍      | 8506/25000 [03:14<06:37, 41.53it/s]

Ep 8500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 34%|███▍      | 8604/25000 [03:17<07:47, 35.11it/s]

Ep 8600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 35%|███▍      | 8706/25000 [03:19<05:48, 46.72it/s]

Ep 8700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 35%|███▌      | 8806/25000 [03:21<05:45, 46.91it/s]

Ep 8800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


 36%|███▌      | 8906/25000 [03:23<05:39, 47.40it/s]

Ep 8900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 36%|███▌      | 9006/25000 [03:26<05:36, 47.57it/s]

Ep 9000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 36%|███▋      | 9106/25000 [03:28<05:36, 47.23it/s]

Ep 9100/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 37%|███▋      | 9206/25000 [03:31<07:39, 34.41it/s]

Ep 9200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 37%|███▋      | 9308/25000 [03:33<05:37, 46.47it/s]

Ep 9300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 38%|███▊      | 9408/25000 [03:35<05:34, 46.59it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 38%|███▊      | 9508/25000 [03:37<05:30, 46.90it/s]

Ep 9500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 38%|███▊      | 9608/25000 [03:40<05:27, 47.07it/s]

Ep 9600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 39%|███▉      | 9703/25000 [03:42<05:24, 47.08it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 39%|███▉      | 9806/25000 [03:44<07:19, 34.55it/s]

Ep 9800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 40%|███▉      | 9908/25000 [03:47<05:10, 48.65it/s]

Ep 9900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 40%|████      | 10008/25000 [03:49<05:23, 46.41it/s]

Ep 10000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 40%|████      | 10108/25000 [03:51<05:28, 45.37it/s]

Ep 10100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 41%|████      | 10208/25000 [03:53<05:18, 46.38it/s]

Ep 10200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 41%|████      | 10308/25000 [03:55<05:19, 45.93it/s]

Ep 10300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 42%|████▏     | 10404/25000 [03:58<06:51, 35.51it/s]

Ep 10400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 42%|████▏     | 10505/25000 [04:01<05:16, 45.79it/s]

Ep 10500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 42%|████▏     | 10605/25000 [04:03<05:05, 47.19it/s]

Ep 10600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 43%|████▎     | 10705/25000 [04:05<05:04, 46.97it/s]

Ep 10700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 43%|████▎     | 10805/25000 [04:07<05:08, 45.99it/s]

Ep 10800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 44%|████▎     | 10905/25000 [04:09<05:00, 46.94it/s]

Ep 10900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 2


 44%|████▍     | 11005/25000 [04:12<06:37, 35.18it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 44%|████▍     | 11109/25000 [04:14<05:01, 46.05it/s]

Ep 11100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 45%|████▍     | 11209/25000 [04:17<04:51, 47.38it/s]

Ep 11200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 45%|████▌     | 11309/25000 [04:19<04:49, 47.35it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 46%|████▌     | 11409/25000 [04:21<04:47, 47.33it/s]

Ep 11400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 46%|████▌     | 11509/25000 [04:23<04:53, 45.94it/s]

Ep 11500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 46%|████▋     | 11604/25000 [04:26<06:21, 35.14it/s]

Ep 11600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 47%|████▋     | 11709/25000 [04:28<04:44, 46.73it/s]

Ep 11700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 47%|████▋     | 11809/25000 [04:30<04:41, 46.86it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 48%|████▊     | 11909/25000 [04:33<04:38, 47.02it/s]

Ep 11900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 48%|████▊     | 12009/25000 [04:35<04:40, 46.29it/s]

Ep 12000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 48%|████▊     | 12104/25000 [04:37<04:37, 46.50it/s]

Ep 12100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 49%|████▉     | 12205/25000 [04:40<06:05, 35.02it/s]

Ep 12200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 49%|████▉     | 12305/25000 [04:42<04:36, 45.99it/s]

Ep 12300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 50%|████▉     | 12405/25000 [04:44<04:37, 45.39it/s]

Ep 12400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 50%|█████     | 12505/25000 [04:46<04:28, 46.50it/s]

Ep 12500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 50%|█████     | 12606/25000 [04:48<04:24, 46.87it/s]

Ep 12600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 51%|█████     | 12706/25000 [04:51<04:20, 47.25it/s]

Ep 12700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 51%|█████     | 12806/25000 [04:53<05:48, 35.04it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 52%|█████▏    | 12907/25000 [04:56<04:19, 46.59it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 52%|█████▏    | 13007/25000 [04:58<04:17, 46.65it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 52%|█████▏    | 13107/25000 [05:00<04:17, 46.25it/s]

Ep 13100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 53%|█████▎    | 13207/25000 [05:02<04:08, 47.50it/s]

Ep 13200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 53%|█████▎    | 13307/25000 [05:05<04:14, 45.88it/s]

Ep 13300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 54%|█████▎    | 13406/25000 [05:07<05:35, 34.55it/s]

Ep 13400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13505/25000 [05:10<03:59, 48.08it/s]

Ep 13500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 54%|█████▍    | 13605/25000 [05:12<04:04, 46.67it/s]

Ep 13600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 55%|█████▍    | 13705/25000 [05:14<04:00, 46.92it/s]

Ep 13700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 55%|█████▌    | 13805/25000 [05:16<04:03, 46.01it/s]

Ep 13800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 56%|█████▌    | 13905/25000 [05:18<04:01, 45.85it/s]

Ep 13900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 56%|█████▌    | 14007/25000 [05:21<05:15, 34.87it/s]

Ep 14000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 56%|█████▋    | 14109/25000 [05:24<03:58, 45.72it/s]

Ep 14100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 57%|█████▋    | 14209/25000 [05:26<03:56, 45.68it/s]

Ep 14200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 57%|█████▋    | 14309/25000 [05:28<03:48, 46.84it/s]

Ep 14300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 58%|█████▊    | 14404/25000 [05:30<03:51, 45.82it/s]

Ep 14400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 58%|█████▊    | 14504/25000 [05:32<03:59, 43.78it/s]

Ep 14500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 58%|█████▊    | 14606/25000 [05:35<05:10, 33.53it/s]

Ep 14600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 59%|█████▉    | 14707/25000 [05:37<03:38, 47.04it/s]

Ep 14700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 59%|█████▉    | 14807/25000 [05:39<03:34, 47.42it/s]

Ep 14800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 60%|█████▉    | 14907/25000 [05:42<03:30, 47.84it/s]

Ep 14900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 60%|██████    | 15007/25000 [05:44<03:34, 46.50it/s]

Ep 15000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 60%|██████    | 15102/25000 [05:46<03:37, 45.43it/s]

Ep 15100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 61%|██████    | 15205/25000 [05:49<04:38, 35.17it/s]

Ep 15200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 61%|██████    | 15308/25000 [05:51<03:27, 46.74it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 62%|██████▏   | 15408/25000 [05:53<03:23, 47.12it/s]

Ep 15400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 62%|██████▏   | 15508/25000 [05:56<03:26, 46.03it/s]

Ep 15500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 62%|██████▏   | 15608/25000 [05:58<03:23, 46.25it/s]

Ep 15600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 63%|██████▎   | 15703/25000 [06:00<03:52, 39.99it/s]

Ep 15700/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 63%|██████▎   | 15804/25000 [06:03<04:20, 35.35it/s]

Ep 15800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 64%|██████▎   | 15907/25000 [06:05<03:17, 46.14it/s]

Ep 15900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 64%|██████▍   | 16007/25000 [06:07<03:13, 46.45it/s]

Ep 16000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 64%|██████▍   | 16107/25000 [06:09<03:08, 47.06it/s]

Ep 16100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 65%|██████▍   | 16207/25000 [06:12<03:08, 46.74it/s]

Ep 16200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 65%|██████▌   | 16307/25000 [06:14<03:30, 41.21it/s]

Ep 16300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 66%|██████▌   | 16406/25000 [06:17<04:18, 33.26it/s]

Ep 16400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 66%|██████▌   | 16505/25000 [06:19<02:59, 47.31it/s]

Ep 16500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 66%|██████▋   | 16605/25000 [06:21<03:03, 45.74it/s]

Ep 16600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 67%|██████▋   | 16705/25000 [06:23<02:59, 46.19it/s]

Ep 16700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 67%|██████▋   | 16805/25000 [06:25<02:52, 47.46it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 68%|██████▊   | 16905/25000 [06:28<03:25, 39.31it/s]

Ep 16900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 68%|██████▊   | 17005/25000 [06:30<03:55, 34.00it/s]

Ep 17000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 68%|██████▊   | 17109/25000 [06:33<02:48, 46.78it/s]

Ep 17100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 69%|██████▉   | 17204/25000 [06:35<02:47, 46.65it/s]

Ep 17200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 69%|██████▉   | 17309/25000 [06:37<02:47, 45.98it/s]

Ep 17300/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 2


 70%|██████▉   | 17409/25000 [06:39<02:41, 46.92it/s]

Ep 17400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 70%|███████   | 17504/25000 [06:41<03:15, 38.25it/s]

Ep 17500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 70%|███████   | 17604/25000 [06:44<03:39, 33.71it/s]

Ep 17600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 2


 71%|███████   | 17708/25000 [06:47<02:35, 46.81it/s]

Ep 17700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 71%|███████   | 17808/25000 [06:49<02:33, 46.84it/s]

Ep 17800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 72%|███████▏  | 17908/25000 [06:51<02:32, 46.48it/s]

Ep 17900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 72%|███████▏  | 18008/25000 [06:53<02:28, 47.14it/s]

Ep 18000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 72%|███████▏  | 18103/25000 [06:55<02:52, 39.98it/s]

Ep 18100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 73%|███████▎  | 18205/25000 [06:58<03:17, 34.33it/s]

Ep 18200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 73%|███████▎  | 18309/25000 [07:01<02:23, 46.58it/s]

Ep 18300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 74%|███████▎  | 18409/25000 [07:03<02:21, 46.60it/s]

Ep 18400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 74%|███████▍  | 18504/25000 [07:05<02:20, 46.40it/s]

Ep 18500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 74%|███████▍  | 18609/25000 [07:07<02:19, 45.67it/s]

Ep 18600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 75%|███████▍  | 18704/25000 [07:09<02:41, 38.92it/s]

Ep 18700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 75%|███████▌  | 18805/25000 [07:12<03:10, 32.51it/s]

Ep 18800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


 76%|███████▌  | 18905/25000 [07:15<02:11, 46.20it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 76%|███████▌  | 19005/25000 [07:17<02:07, 46.95it/s]

Ep 19000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 76%|███████▋  | 19105/25000 [07:19<02:05, 46.98it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 77%|███████▋  | 19205/25000 [07:21<02:06, 45.80it/s]

Ep 19200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 77%|███████▋  | 19304/25000 [07:23<02:29, 38.16it/s]

Ep 19300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 78%|███████▊  | 19404/25000 [07:26<02:52, 32.48it/s]

Ep 19400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 78%|███████▊  | 19508/25000 [07:29<01:58, 46.15it/s]

Ep 19500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 78%|███████▊  | 19608/25000 [07:31<01:58, 45.36it/s]

Ep 19600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 79%|███████▉  | 19708/25000 [07:33<01:58, 44.56it/s]

Ep 19700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 79%|███████▉  | 19808/25000 [07:35<01:57, 44.18it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 80%|███████▉  | 19906/25000 [07:38<02:22, 35.77it/s]

Ep 19900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 80%|████████  | 20009/25000 [07:41<01:59, 41.71it/s]

Ep 20000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 80%|████████  | 20109/25000 [07:43<01:45, 46.35it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 81%|████████  | 20204/25000 [07:45<01:41, 47.09it/s]

Ep 20200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 81%|████████  | 20309/25000 [07:47<01:41, 46.30it/s]

Ep 20300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 82%|████████▏ | 20409/25000 [07:49<01:43, 44.55it/s]

Ep 20400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 82%|████████▏ | 20505/25000 [07:52<01:57, 38.41it/s]

Ep 20500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 2


 82%|████████▏ | 20604/25000 [07:55<01:50, 39.76it/s]

Ep 20600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 83%|████████▎ | 20708/25000 [07:57<01:35, 45.11it/s]

Ep 20700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 83%|████████▎ | 20808/25000 [07:59<01:32, 45.37it/s]

Ep 20800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 84%|████████▎ | 20908/25000 [08:01<01:31, 44.54it/s]

Ep 20900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 84%|████████▍ | 21008/25000 [08:04<01:25, 46.65it/s]

Ep 21000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 2


 84%|████████▍ | 21103/25000 [08:06<01:48, 35.95it/s]

Ep 21100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 85%|████████▍ | 21209/25000 [08:09<01:24, 45.11it/s]

Ep 21200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 85%|████████▌ | 21309/25000 [08:11<01:19, 46.43it/s]

Ep 21300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 86%|████████▌ | 21409/25000 [08:13<01:17, 46.18it/s]

Ep 21400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 86%|████████▌ | 21509/25000 [08:15<01:16, 45.91it/s]

Ep 21500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 86%|████████▋ | 21609/25000 [08:18<01:15, 45.05it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 87%|████████▋ | 21705/25000 [08:20<01:33, 35.08it/s]

Ep 21700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 87%|████████▋ | 21806/25000 [08:23<01:12, 44.07it/s]

Ep 21800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 88%|████████▊ | 21906/25000 [08:25<01:08, 44.87it/s]

Ep 21900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 2


 88%|████████▊ | 22006/25000 [08:27<01:04, 46.24it/s]

Ep 22000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 88%|████████▊ | 22106/25000 [08:29<01:03, 45.82it/s]

Ep 22100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 89%|████████▉ | 22206/25000 [08:32<01:00, 46.31it/s]

Ep 22200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 89%|████████▉ | 22304/25000 [08:34<01:16, 35.42it/s]

Ep 22300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 90%|████████▉ | 22405/25000 [08:37<00:58, 44.43it/s]

Ep 22400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 90%|█████████ | 22505/25000 [08:39<00:54, 45.51it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 90%|█████████ | 22605/25000 [08:41<00:50, 47.17it/s]

Ep 22600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 2


 91%|█████████ | 22705/25000 [08:43<00:50, 45.30it/s]

Ep 22700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 91%|█████████ | 22805/25000 [08:46<00:47, 46.28it/s]

Ep 22800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 92%|█████████▏| 22906/25000 [08:48<00:55, 37.52it/s]

Ep 22900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 92%|█████████▏| 23005/25000 [08:51<00:44, 44.52it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 92%|█████████▏| 23105/25000 [08:53<00:42, 44.93it/s]

Ep 23100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 93%|█████████▎| 23205/25000 [08:55<00:38, 46.77it/s]

Ep 23200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 93%|█████████▎| 23305/25000 [08:57<00:36, 46.63it/s]

Ep 23300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 94%|█████████▎| 23405/25000 [09:00<00:33, 46.94it/s]

Ep 23400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 94%|█████████▍| 23504/25000 [09:02<00:41, 35.89it/s]

Ep 23500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 94%|█████████▍| 23609/25000 [09:05<00:30, 44.98it/s]

Ep 23600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 95%|█████████▍| 23709/25000 [09:07<00:27, 46.42it/s]

Ep 23700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 95%|█████████▌| 23809/25000 [09:09<00:25, 46.40it/s]

Ep 23800/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 2


 96%|█████████▌| 23909/25000 [09:12<00:23, 46.03it/s]

Ep 23900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 2


 96%|█████████▌| 24009/25000 [09:14<00:21, 45.77it/s]

Ep 24000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 96%|█████████▋| 24107/25000 [09:16<00:24, 36.76it/s]

Ep 24100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 97%|█████████▋| 24208/25000 [09:19<00:17, 44.95it/s]

Ep 24200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 97%|█████████▋| 24308/25000 [09:21<00:15, 45.56it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 98%|█████████▊| 24408/25000 [09:23<00:12, 45.97it/s]

Ep 24400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 98%|█████████▊| 24508/25000 [09:25<00:10, 46.76it/s]

Ep 24500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 98%|█████████▊| 24608/25000 [09:28<00:08, 47.30it/s]

Ep 24600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 2


 99%|█████████▉| 24705/25000 [09:30<00:08, 35.52it/s]

Ep 24700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 99%|█████████▉| 24806/25000 [09:33<00:04, 46.62it/s]

Ep 24800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


100%|█████████▉| 24906/25000 [09:35<00:02, 44.99it/s]

Ep 24900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


100%|██████████| 25000/25000 [09:37<00:00, 43.28it/s]


Ep 25000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2

TEST:


 41%|████      | 122/300 [00:00<00:01, 145.52it/s]

Ep 100/300, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 2


 76%|███████▌  | 228/300 [00:01<00:00, 146.68it/s]

Ep 200/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


100%|██████████| 300/300 [00:02<00:00, 145.21it/s]


Ep 300/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2

GAMMA 0.8 - LR 0.1 - Entropy Decay False


  0%|          | 105/25000 [00:02<09:02, 45.89it/s]

Ep 100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 3


  1%|          | 206/25000 [00:05<12:31, 32.98it/s]

Ep 200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 1


  1%|          | 306/25000 [00:07<08:45, 46.97it/s]

Ep 300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 1


  2%|▏         | 406/25000 [00:09<09:01, 45.44it/s]

Ep 400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 9


  2%|▏         | 506/25000 [00:11<08:50, 46.17it/s]

Ep 500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  2%|▏         | 606/25000 [00:14<08:43, 46.59it/s]

Ep 600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


  3%|▎         | 706/25000 [00:16<08:37, 46.96it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  3%|▎         | 803/25000 [00:18<10:50, 37.18it/s]

Ep 800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 9


  4%|▎         | 907/25000 [00:21<08:37, 46.52it/s]

Ep 900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


  4%|▍         | 1007/25000 [00:23<08:41, 45.98it/s]

Ep 1000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


  4%|▍         | 1107/25000 [00:25<08:31, 46.67it/s]

Ep 1100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


  5%|▍         | 1207/25000 [00:28<08:31, 46.50it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  5%|▌         | 1307/25000 [00:30<08:15, 47.82it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  6%|▌         | 1403/25000 [00:32<10:57, 35.88it/s]

Ep 1400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  6%|▌         | 1507/25000 [00:35<08:14, 47.54it/s]

Ep 1500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  6%|▋         | 1607/25000 [00:37<08:22, 46.59it/s]

Ep 1600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  7%|▋         | 1707/25000 [00:39<08:37, 45.03it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  7%|▋         | 1808/25000 [00:41<08:11, 47.19it/s]

Ep 1800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 5


  8%|▊         | 1908/25000 [00:43<08:25, 45.67it/s]

Ep 1900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  8%|▊         | 2003/25000 [00:46<11:35, 33.06it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  8%|▊         | 2109/25000 [00:49<08:27, 45.08it/s]

Ep 2100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


  9%|▉         | 2209/25000 [00:51<08:05, 46.95it/s]

Ep 2200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


  9%|▉         | 2309/25000 [00:53<08:09, 46.38it/s]

Ep 2300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 10%|▉         | 2409/25000 [00:55<08:07, 46.35it/s]

Ep 2400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 10%|█         | 2504/25000 [00:57<08:54, 42.06it/s]

Ep 2500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 10%|█         | 2604/25000 [01:00<10:29, 35.56it/s]

Ep 2600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 11%|█         | 2706/25000 [01:03<08:05, 45.87it/s]

Ep 2700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 11%|█         | 2806/25000 [01:05<07:52, 46.98it/s]

Ep 2800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 12%|█▏        | 2906/25000 [01:07<07:44, 47.54it/s]

Ep 2900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 12%|█▏        | 3006/25000 [01:09<07:57, 46.05it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 5


 12%|█▏        | 3106/25000 [01:11<09:24, 38.81it/s]

Ep 3100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 13%|█▎        | 3206/25000 [01:14<11:06, 32.69it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 13%|█▎        | 3304/25000 [01:17<08:11, 44.10it/s]

Ep 3300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 14%|█▎        | 3404/25000 [01:19<08:01, 44.88it/s]

Ep 3400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 14%|█▍        | 3504/25000 [01:21<08:00, 44.73it/s]

Ep 3500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 14%|█▍        | 3609/25000 [01:23<07:49, 45.54it/s]

Ep 3600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 15%|█▍        | 3705/25000 [01:26<09:38, 36.83it/s]

Ep 3700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 15%|█▌        | 3805/25000 [01:29<10:37, 33.23it/s]

Ep 3800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 16%|█▌        | 3906/25000 [01:31<07:39, 45.95it/s]

Ep 3900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 16%|█▌        | 4006/25000 [01:33<07:53, 44.30it/s]

Ep 4000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 7


 16%|█▋        | 4106/25000 [01:35<07:41, 45.30it/s]

Ep 4100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 17%|█▋        | 4206/25000 [01:38<07:29, 46.21it/s]

Ep 4200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 7


 17%|█▋        | 4306/25000 [01:40<09:03, 38.06it/s]

Ep 4300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 18%|█▊        | 4406/25000 [01:43<10:04, 34.06it/s]

Ep 4400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 18%|█▊        | 4505/25000 [01:45<07:22, 46.34it/s]

Ep 4500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 18%|█▊        | 4605/25000 [01:47<07:22, 46.08it/s]

Ep 4600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 19%|█▉        | 4705/25000 [01:49<07:27, 45.35it/s]

Ep 4700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 7


 19%|█▉        | 4805/25000 [01:52<07:10, 46.86it/s]

Ep 4800/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 20%|█▉        | 4905/25000 [01:54<09:11, 36.46it/s]

Ep 4900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 20%|██        | 5005/25000 [01:57<09:31, 34.98it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 20%|██        | 5107/25000 [01:59<07:01, 47.17it/s]

Ep 5100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 21%|██        | 5207/25000 [02:01<07:13, 45.70it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 21%|██        | 5307/25000 [02:03<07:02, 46.61it/s]

Ep 5300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 22%|██▏       | 5407/25000 [02:06<07:04, 46.15it/s]

Ep 5400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 22%|██▏       | 5504/25000 [02:08<08:32, 38.01it/s]

Ep 5500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 22%|██▏       | 5606/25000 [02:11<09:23, 34.45it/s]

Ep 5600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 23%|██▎       | 5708/25000 [02:13<06:54, 46.51it/s]

Ep 5700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 23%|██▎       | 5808/25000 [02:15<06:56, 46.11it/s]

Ep 5800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 24%|██▎       | 5908/25000 [02:17<07:08, 44.51it/s]

Ep 5900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 24%|██▍       | 6004/25000 [02:19<06:51, 46.16it/s]

Ep 6000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 24%|██▍       | 6104/25000 [02:22<08:21, 37.71it/s]

Ep 6100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 25%|██▍       | 6204/25000 [02:25<09:06, 34.42it/s]

Ep 6200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 7


 25%|██▌       | 6306/25000 [02:27<06:35, 47.30it/s]

Ep 6300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 26%|██▌       | 6406/25000 [02:29<06:45, 45.85it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 26%|██▌       | 6506/25000 [02:31<06:42, 45.90it/s]

Ep 6500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 26%|██▋       | 6606/25000 [02:33<06:44, 45.45it/s]

Ep 6600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 27%|██▋       | 6707/25000 [02:36<08:09, 37.38it/s]

Ep 6700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 27%|██▋       | 6806/25000 [02:39<09:04, 33.44it/s]

Ep 6800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 28%|██▊       | 6909/25000 [02:41<06:30, 46.29it/s]

Ep 6900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 28%|██▊       | 7009/25000 [02:43<06:23, 46.86it/s]

Ep 7000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 28%|██▊       | 7109/25000 [02:45<06:33, 45.50it/s]

Ep 7100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 29%|██▉       | 7209/25000 [02:47<06:30, 45.62it/s]

Ep 7200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 29%|██▉       | 7305/25000 [02:50<08:06, 36.34it/s]

Ep 7300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 30%|██▉       | 7406/25000 [02:52<08:34, 34.22it/s]

Ep 7400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 30%|███       | 7508/25000 [02:55<06:14, 46.67it/s]

Ep 7500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 30%|███       | 7608/25000 [02:57<06:15, 46.37it/s]

Ep 7600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 31%|███       | 7708/25000 [02:59<06:30, 44.30it/s]

Ep 7700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 31%|███       | 7808/25000 [03:01<06:26, 44.51it/s]

Ep 7800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 32%|███▏      | 7904/25000 [03:04<08:04, 35.31it/s]

Ep 7900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 32%|███▏      | 8004/25000 [03:07<09:17, 30.49it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 32%|███▏      | 8108/25000 [03:09<06:07, 46.00it/s]

Ep 8100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 33%|███▎      | 8208/25000 [03:11<06:10, 45.33it/s]

Ep 8200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 33%|███▎      | 8308/25000 [03:13<06:03, 45.97it/s]

Ep 8300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 34%|███▎      | 8408/25000 [03:15<06:00, 45.97it/s]

Ep 8400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 34%|███▍      | 8507/25000 [03:18<07:16, 37.76it/s]

Ep 8500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 34%|███▍      | 8604/25000 [03:21<07:48, 35.00it/s]

Ep 8600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 35%|███▍      | 8708/25000 [03:23<05:48, 46.74it/s]

Ep 8700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 35%|███▌      | 8808/25000 [03:25<05:58, 45.12it/s]

Ep 8800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 36%|███▌      | 8908/25000 [03:27<06:02, 44.45it/s]

Ep 8900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 36%|███▌      | 9008/25000 [03:29<05:42, 46.65it/s]

Ep 9000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 36%|███▋      | 9107/25000 [03:32<07:04, 37.43it/s]

Ep 9100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 37%|███▋      | 9208/25000 [03:35<07:07, 36.93it/s]

Ep 9200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 37%|███▋      | 9308/25000 [03:37<05:46, 45.23it/s]

Ep 9300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 38%|███▊      | 9408/25000 [03:39<05:48, 44.78it/s]

Ep 9400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 38%|███▊      | 9508/25000 [03:41<05:42, 45.21it/s]

Ep 9500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 38%|███▊      | 9608/25000 [03:44<05:34, 45.95it/s]

Ep 9600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 39%|███▉      | 9704/25000 [03:46<06:39, 38.28it/s]

Ep 9700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 39%|███▉      | 9808/25000 [03:49<06:10, 41.00it/s]

Ep 9800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 40%|███▉      | 9908/25000 [03:51<05:34, 45.13it/s]

Ep 9900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 40%|████      | 10008/25000 [03:53<05:27, 45.80it/s]

Ep 10000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 40%|████      | 10108/25000 [03:55<05:28, 45.28it/s]

Ep 10100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 41%|████      | 10208/25000 [03:58<05:14, 46.99it/s]

Ep 10200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 41%|████      | 10304/25000 [04:00<06:22, 38.45it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 42%|████▏     | 10409/25000 [04:03<05:32, 43.89it/s]

Ep 10400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 42%|████▏     | 10509/25000 [04:05<05:24, 44.67it/s]

Ep 10500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 42%|████▏     | 10609/25000 [04:07<05:19, 45.01it/s]

Ep 10600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 43%|████▎     | 10709/25000 [04:09<05:07, 46.47it/s]

Ep 10700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 43%|████▎     | 10809/25000 [04:12<05:06, 46.30it/s]

Ep 10800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 7


 44%|████▎     | 10906/25000 [04:14<06:45, 34.72it/s]

Ep 10900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 44%|████▍     | 11009/25000 [04:17<05:11, 44.92it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 44%|████▍     | 11109/25000 [04:19<05:06, 45.27it/s]

Ep 11100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 7


 45%|████▍     | 11209/25000 [04:21<05:14, 43.83it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 45%|████▌     | 11309/25000 [04:24<04:59, 45.68it/s]

Ep 11300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 46%|████▌     | 11409/25000 [04:26<04:55, 45.93it/s]

Ep 11400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 46%|████▌     | 11505/25000 [04:28<06:12, 36.25it/s]

Ep 11500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 46%|████▋     | 11608/25000 [04:31<04:52, 45.72it/s]

Ep 11600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 47%|████▋     | 11708/25000 [04:33<04:47, 46.27it/s]

Ep 11700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 47%|████▋     | 11808/25000 [04:35<04:41, 46.90it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 48%|████▊     | 11908/25000 [04:37<04:37, 47.12it/s]

Ep 11900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 48%|████▊     | 12008/25000 [04:40<04:38, 46.72it/s]

Ep 12000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 7


 48%|████▊     | 12106/25000 [04:42<06:08, 34.96it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 49%|████▉     | 12205/25000 [04:45<04:47, 44.52it/s]

Ep 12200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 49%|████▉     | 12305/25000 [04:47<04:29, 47.11it/s]

Ep 12300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 50%|████▉     | 12405/25000 [04:49<04:33, 46.02it/s]

Ep 12400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


 50%|█████     | 12505/25000 [04:51<04:52, 42.68it/s]

Ep 12500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 50%|█████     | 12608/25000 [04:54<05:04, 40.71it/s]

Ep 12600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 51%|█████     | 12707/25000 [04:56<05:51, 34.96it/s]

Ep 12700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 51%|█████     | 12807/25000 [04:59<04:32, 44.68it/s]

Ep 12800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 52%|█████▏    | 12907/25000 [05:01<04:22, 46.12it/s]

Ep 12900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 52%|█████▏    | 13007/25000 [05:03<04:20, 46.00it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 52%|█████▏    | 13107/25000 [05:06<04:16, 46.32it/s]

Ep 13100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 53%|█████▎    | 13207/25000 [05:08<04:08, 47.48it/s]

Ep 13200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 53%|█████▎    | 13306/25000 [05:10<05:16, 36.97it/s]

Ep 13300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 54%|█████▎    | 13406/25000 [05:13<04:17, 45.07it/s]

Ep 13400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 54%|█████▍    | 13506/25000 [05:15<04:04, 47.04it/s]

Ep 13500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 54%|█████▍    | 13606/25000 [05:17<04:06, 46.27it/s]

Ep 13600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 55%|█████▍    | 13706/25000 [05:19<04:05, 46.10it/s]

Ep 13700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 55%|█████▌    | 13806/25000 [05:22<04:27, 41.82it/s]

Ep 13800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 56%|█████▌    | 13906/25000 [05:24<05:20, 34.62it/s]

Ep 13900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 56%|█████▌    | 14005/25000 [05:27<04:06, 44.69it/s]

Ep 14000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 56%|█████▋    | 14105/25000 [05:29<03:55, 46.27it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 57%|█████▋    | 14205/25000 [05:31<03:58, 45.23it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 57%|█████▋    | 14305/25000 [05:33<03:54, 45.59it/s]

Ep 14300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 58%|█████▊    | 14405/25000 [05:36<03:49, 46.16it/s]

Ep 14400/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 58%|█████▊    | 14504/25000 [05:38<04:42, 37.15it/s]

Ep 14500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 58%|█████▊    | 14608/25000 [05:41<03:46, 45.98it/s]

Ep 14600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 59%|█████▉    | 14708/25000 [05:43<03:47, 45.22it/s]

Ep 14700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 59%|█████▉    | 14808/25000 [05:45<03:45, 45.28it/s]

Ep 14800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 60%|█████▉    | 14908/25000 [05:48<03:40, 45.69it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 60%|██████    | 15008/25000 [05:50<03:36, 46.17it/s]

Ep 15000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 60%|██████    | 15106/25000 [05:53<04:26, 37.17it/s]

Ep 15100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 61%|██████    | 15208/25000 [05:55<03:32, 45.98it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 61%|██████    | 15308/25000 [05:57<03:31, 45.80it/s]

Ep 15300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 62%|██████▏   | 15408/25000 [05:59<03:27, 46.23it/s]

Ep 15400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 62%|██████▏   | 15508/25000 [06:02<03:26, 46.06it/s]

Ep 15500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 62%|██████▏   | 15608/25000 [06:04<03:17, 47.48it/s]

Ep 15600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 63%|██████▎   | 15706/25000 [06:06<04:04, 37.95it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 63%|██████▎   | 15808/25000 [06:09<03:23, 45.17it/s]

Ep 15800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 64%|██████▎   | 15908/25000 [06:11<03:17, 46.06it/s]

Ep 15900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 64%|██████▍   | 16008/25000 [06:13<03:15, 46.04it/s]

Ep 16000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 64%|██████▍   | 16108/25000 [06:16<03:14, 45.68it/s]

Ep 16100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 65%|██████▍   | 16208/25000 [06:18<03:10, 46.25it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 65%|██████▌   | 16305/25000 [06:20<03:52, 37.34it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 66%|██████▌   | 16407/25000 [06:23<03:13, 44.34it/s]

Ep 16400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 66%|██████▌   | 16507/25000 [06:25<03:03, 46.18it/s]

Ep 16500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 66%|██████▋   | 16607/25000 [06:27<03:02, 45.94it/s]

Ep 16600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 67%|██████▋   | 16707/25000 [06:30<03:01, 45.73it/s]

Ep 16700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 67%|██████▋   | 16807/25000 [06:32<02:56, 46.54it/s]

Ep 16800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 68%|██████▊   | 16904/25000 [06:35<03:50, 35.06it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 68%|██████▊   | 17009/25000 [06:37<02:56, 45.40it/s]

Ep 17000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 68%|██████▊   | 17109/25000 [06:39<02:50, 46.26it/s]

Ep 17100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 69%|██████▉   | 17209/25000 [06:42<02:53, 44.83it/s]

Ep 17200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 69%|██████▉   | 17309/25000 [06:44<02:45, 46.47it/s]

Ep 17300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 7


 70%|██████▉   | 17404/25000 [06:46<02:55, 43.34it/s]

Ep 17400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7


 70%|███████   | 17506/25000 [06:49<03:40, 33.97it/s]

Ep 17500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 70%|███████   | 17607/25000 [06:51<02:42, 45.43it/s]

Ep 17600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 71%|███████   | 17707/25000 [06:53<02:38, 45.91it/s]

Ep 17700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 71%|███████   | 17807/25000 [06:56<02:35, 46.24it/s]

Ep 17800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 72%|███████▏  | 17907/25000 [06:58<02:32, 46.42it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 72%|███████▏  | 18007/25000 [07:00<02:50, 41.07it/s]

Ep 18000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 72%|███████▏  | 18104/25000 [07:03<03:18, 34.73it/s]

Ep 18100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 73%|███████▎  | 18207/25000 [07:05<02:29, 45.37it/s]

Ep 18200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 73%|███████▎  | 18307/25000 [07:07<02:23, 46.51it/s]

Ep 18300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 74%|███████▎  | 18407/25000 [07:10<02:26, 45.14it/s]

Ep 18400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 74%|███████▍  | 18507/25000 [07:12<02:20, 46.09it/s]

Ep 18500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 7


 74%|███████▍  | 18607/25000 [07:14<02:43, 38.98it/s]

Ep 18600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 75%|███████▍  | 18703/25000 [07:17<03:03, 34.40it/s]

Ep 18700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 75%|███████▌  | 18809/25000 [07:19<02:14, 46.00it/s]

Ep 18800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 76%|███████▌  | 18909/25000 [07:21<02:09, 46.90it/s]

Ep 18900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 76%|███████▌  | 19009/25000 [07:24<02:11, 45.73it/s]

Ep 19000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 76%|███████▋  | 19109/25000 [07:26<02:07, 46.09it/s]

Ep 19100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 77%|███████▋  | 19204/25000 [07:28<02:18, 41.86it/s]

Ep 19200/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 77%|███████▋  | 19307/25000 [07:31<02:45, 34.40it/s]

Ep 19300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 78%|███████▊  | 19405/25000 [07:33<02:01, 46.23it/s]

Ep 19400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 78%|███████▊  | 19505/25000 [07:35<01:59, 45.83it/s]

Ep 19500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 78%|███████▊  | 19605/25000 [07:37<01:55, 46.85it/s]

Ep 19600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 79%|███████▉  | 19705/25000 [07:40<01:52, 46.93it/s]

Ep 19700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 7


 79%|███████▉  | 19805/25000 [07:42<02:05, 41.33it/s]

Ep 19800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 80%|███████▉  | 19906/25000 [07:45<02:26, 34.78it/s]

Ep 19900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 80%|████████  | 20005/25000 [07:47<01:45, 47.20it/s]

Ep 20000/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 7


 80%|████████  | 20105/25000 [07:49<01:50, 44.33it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 7


 81%|████████  | 20205/25000 [07:51<01:45, 45.60it/s]

Ep 20200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 81%|████████  | 20305/25000 [07:54<01:44, 45.13it/s]

Ep 20300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 82%|████████▏ | 20405/25000 [07:56<01:58, 38.84it/s]

Ep 20400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 82%|████████▏ | 20503/25000 [07:59<02:14, 33.39it/s]

Ep 20500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 82%|████████▏ | 20606/25000 [08:01<01:33, 46.91it/s]

Ep 20600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 83%|████████▎ | 20706/25000 [08:03<01:35, 45.13it/s]

Ep 20700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 83%|████████▎ | 20806/25000 [08:05<01:32, 45.56it/s]

Ep 20800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 84%|████████▎ | 20906/25000 [08:08<01:26, 47.24it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 84%|████████▍ | 21005/25000 [08:10<01:47, 37.07it/s]

Ep 21000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 84%|████████▍ | 21106/25000 [08:13<01:58, 32.79it/s]

Ep 21100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 85%|████████▍ | 21209/25000 [08:15<01:20, 47.12it/s]

Ep 21200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 85%|████████▌ | 21309/25000 [08:17<01:25, 43.21it/s]

Ep 21300/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 86%|████████▌ | 21409/25000 [08:20<01:20, 44.82it/s]

Ep 21400/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 86%|████████▌ | 21509/25000 [08:22<01:16, 45.76it/s]

Ep 21500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 86%|████████▋ | 21605/25000 [08:24<01:33, 36.26it/s]

Ep 21600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 87%|████████▋ | 21705/25000 [08:27<01:39, 33.12it/s]

Ep 21700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 87%|████████▋ | 21805/25000 [08:29<01:10, 45.33it/s]

Ep 21800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 88%|████████▊ | 21905/25000 [08:31<01:06, 46.45it/s]

Ep 21900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 88%|████████▊ | 22005/25000 [08:34<01:04, 46.69it/s]

Ep 22000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 88%|████████▊ | 22105/25000 [08:36<01:01, 46.99it/s]

Ep 22100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 7


 89%|████████▉ | 22204/25000 [08:38<01:12, 38.81it/s]

Ep 22200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 89%|████████▉ | 22305/25000 [08:41<01:17, 34.56it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 90%|████████▉ | 22405/25000 [08:43<00:59, 43.38it/s]

Ep 22400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 90%|█████████ | 22505/25000 [08:45<00:54, 45.61it/s]

Ep 22500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 90%|█████████ | 22605/25000 [08:48<00:51, 46.36it/s]

Ep 22600/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 91%|█████████ | 22705/25000 [08:50<00:49, 46.22it/s]

Ep 22700/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 7


 91%|█████████ | 22807/25000 [08:52<01:00, 36.49it/s]

Ep 22800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 92%|█████████▏| 22905/25000 [08:55<01:01, 34.08it/s]

Ep 22900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 92%|█████████▏| 23006/25000 [08:57<00:43, 46.09it/s]

Ep 23000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 7


 92%|█████████▏| 23106/25000 [08:59<00:40, 46.52it/s]

Ep 23100/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 7


 93%|█████████▎| 23206/25000 [09:02<00:38, 46.75it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 93%|█████████▎| 23306/25000 [09:04<00:36, 46.96it/s]

Ep 23300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 94%|█████████▎| 23404/25000 [09:06<00:42, 37.75it/s]

Ep 23400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 94%|█████████▍| 23506/25000 [09:09<00:44, 33.36it/s]

Ep 23500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 94%|█████████▍| 23608/25000 [09:11<00:31, 44.64it/s]

Ep 23600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 95%|█████████▍| 23708/25000 [09:13<00:28, 45.45it/s]

Ep 23700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 95%|█████████▌| 23808/25000 [09:16<00:25, 46.07it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 7


 96%|█████████▌| 23908/25000 [09:18<00:23, 46.13it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 96%|█████████▌| 24005/25000 [09:20<00:26, 37.23it/s]

Ep 24000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 96%|█████████▋| 24105/25000 [09:23<00:26, 33.49it/s]

Ep 24100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 97%|█████████▋| 24208/25000 [09:25<00:17, 45.10it/s]

Ep 24200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 7


 97%|█████████▋| 24308/25000 [09:27<00:14, 46.16it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 98%|█████████▊| 24408/25000 [09:30<00:12, 46.72it/s]

Ep 24400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 98%|█████████▊| 24508/25000 [09:32<00:10, 46.03it/s]

Ep 24500/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 7


 98%|█████████▊| 24607/25000 [09:34<00:10, 37.09it/s]

Ep 24600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 7


 99%|█████████▉| 24703/25000 [09:37<00:08, 34.05it/s]

Ep 24700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 7


 99%|█████████▉| 24806/25000 [09:39<00:04, 44.80it/s]

Ep 24800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


100%|█████████▉| 24906/25000 [09:41<00:02, 45.38it/s]

Ep 24900/25000, Opt. Action: 7, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 7


100%|██████████| 25000/25000 [09:43<00:00, 42.82it/s]


Ep 25000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7

TEST:


 40%|████      | 121/300 [00:00<00:01, 145.37it/s]

Ep 100/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 71%|███████   | 213/300 [00:01<00:00, 140.62it/s]

Ep 200/300, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


100%|██████████| 300/300 [00:02<00:00, 144.37it/s]


Ep 300/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 7

GAMMA 0.8 - LR 0.01 - Entropy Decay False


  0%|          | 105/25000 [00:02<11:42, 35.44it/s]

Ep 100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


  1%|          | 207/25000 [00:05<10:20, 39.99it/s]

Ep 200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


  1%|          | 307/25000 [00:07<08:52, 46.33it/s]

Ep 300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


  2%|▏         | 407/25000 [00:09<09:09, 44.78it/s]

Ep 400/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.7, First Action 1


  2%|▏         | 507/25000 [00:12<08:50, 46.19it/s]

Ep 500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.7, First Action 1


  2%|▏         | 607/25000 [00:14<08:39, 46.91it/s]

Ep 600/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.1, First Action 1


  3%|▎         | 708/25000 [00:16<10:28, 38.67it/s]

Ep 700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


  3%|▎         | 807/25000 [00:19<09:47, 41.19it/s]

Ep 800/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.1, First Action 1


  4%|▎         | 907/25000 [00:21<08:39, 46.39it/s]

Ep 900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.7, First Action 1


  4%|▍         | 1007/25000 [00:23<08:34, 46.65it/s]

Ep 1000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


  4%|▍         | 1107/25000 [00:25<08:30, 46.76it/s]

Ep 1100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  5%|▍         | 1207/25000 [00:28<08:34, 46.28it/s]

Ep 1200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


  5%|▌         | 1307/25000 [00:30<10:46, 36.64it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 1


  6%|▌         | 1407/25000 [00:33<09:31, 41.27it/s]

Ep 1400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


  6%|▌         | 1507/25000 [00:35<08:23, 46.65it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.8, First Action 1


  6%|▋         | 1607/25000 [00:37<08:13, 47.36it/s]

Ep 1600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


  7%|▋         | 1707/25000 [00:39<08:31, 45.57it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.8, First Action 1


  7%|▋         | 1807/25000 [00:42<08:23, 46.10it/s]

Ep 1800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


  8%|▊         | 1907/25000 [00:44<10:25, 36.89it/s]

Ep 1900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 1


  8%|▊         | 2007/25000 [00:47<09:14, 41.44it/s]

Ep 2000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


  8%|▊         | 2107/25000 [00:49<08:15, 46.19it/s]

Ep 2100/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.8, First Action 1


  9%|▉         | 2207/25000 [00:51<08:06, 46.84it/s]

Ep 2200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


  9%|▉         | 2307/25000 [00:53<07:57, 47.48it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 10%|▉         | 2407/25000 [00:56<07:58, 47.19it/s]

Ep 2400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 10%|█         | 2504/25000 [00:58<10:05, 37.13it/s]

Ep 2500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 10%|█         | 2607/25000 [01:01<09:13, 40.44it/s]

Ep 2600/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.8, First Action 1


 11%|█         | 2707/25000 [01:03<07:57, 46.69it/s]

Ep 2700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.0, First Action 1


 11%|█         | 2807/25000 [01:05<07:53, 46.88it/s]

Ep 2800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 12%|█▏        | 2907/25000 [01:07<08:01, 45.91it/s]

Ep 2900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 12%|█▏        | 3007/25000 [01:10<07:57, 46.10it/s]

Ep 3000/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.0, First Action 1


 12%|█▏        | 3107/25000 [01:12<10:19, 35.33it/s]

Ep 3100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 13%|█▎        | 3207/25000 [01:15<08:54, 40.79it/s]

Ep 3200/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.3, First Action 1


 13%|█▎        | 3307/25000 [01:17<07:54, 45.74it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.8, First Action 1


 14%|█▎        | 3407/25000 [01:19<07:42, 46.72it/s]

Ep 3400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.7, First Action 1


 14%|█▍        | 3507/25000 [01:21<07:35, 47.19it/s]

Ep 3500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 1


 14%|█▍        | 3607/25000 [01:23<07:39, 46.53it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 15%|█▍        | 3704/25000 [01:26<09:15, 38.36it/s]

Ep 3700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 1


 15%|█▌        | 3809/25000 [01:29<08:12, 42.99it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 16%|█▌        | 3909/25000 [01:31<07:47, 45.12it/s]

Ep 3900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.7, First Action 1


 16%|█▌        | 4009/25000 [01:33<07:29, 46.74it/s]

Ep 4000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 16%|█▋        | 4109/25000 [01:35<07:23, 47.08it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 1


 17%|█▋        | 4209/25000 [01:37<07:13, 47.99it/s]

Ep 4200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 17%|█▋        | 4305/25000 [01:40<09:58, 34.58it/s]

Ep 4300/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.6, First Action 1


 18%|█▊        | 4405/25000 [01:43<07:54, 43.38it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 18%|█▊        | 4505/25000 [01:45<07:31, 45.41it/s]

Ep 4500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 18%|█▊        | 4605/25000 [01:47<07:30, 45.30it/s]

Ep 4600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 19%|█▉        | 4705/25000 [01:49<07:21, 45.95it/s]

Ep 4700/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.7, First Action 1


 19%|█▉        | 4805/25000 [01:51<07:19, 45.95it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.3, First Action 1


 20%|█▉        | 4904/25000 [01:54<08:41, 38.53it/s]

Ep 4900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 20%|██        | 5007/25000 [01:57<08:18, 40.14it/s]

Ep 5000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 20%|██        | 5107/25000 [01:59<07:04, 46.87it/s]

Ep 5100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 21%|██        | 5207/25000 [02:01<07:03, 46.79it/s]

Ep 5200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 21%|██        | 5307/25000 [02:03<07:02, 46.65it/s]

Ep 5300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 22%|██▏       | 5407/25000 [02:05<06:57, 46.88it/s]

Ep 5400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 22%|██▏       | 5504/25000 [02:08<08:46, 37.02it/s]

Ep 5500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 22%|██▏       | 5605/25000 [02:11<08:41, 37.16it/s]

Ep 5600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 23%|██▎       | 5705/25000 [02:13<06:49, 47.14it/s]

Ep 5700/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.8, First Action 1


 23%|██▎       | 5805/25000 [02:15<07:21, 43.52it/s]

Ep 5800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 24%|██▎       | 5905/25000 [02:17<07:02, 45.20it/s]

Ep 5900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 24%|██▍       | 6005/25000 [02:19<06:55, 45.70it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 24%|██▍       | 6106/25000 [02:22<09:21, 33.66it/s]

Ep 6100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 25%|██▍       | 6209/25000 [02:25<07:35, 41.23it/s]

Ep 6200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 25%|██▌       | 6309/25000 [02:27<06:51, 45.45it/s]

Ep 6300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 26%|██▌       | 6409/25000 [02:29<06:41, 46.35it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 26%|██▌       | 6509/25000 [02:31<06:37, 46.53it/s]

Ep 6500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 26%|██▋       | 6609/25000 [02:33<06:31, 46.92it/s]

Ep 6600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 27%|██▋       | 6705/25000 [02:36<08:01, 38.03it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.9, First Action 1


 27%|██▋       | 6806/25000 [02:39<07:48, 38.87it/s]

Ep 6800/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.4, First Action 1


 28%|██▊       | 6906/25000 [02:41<06:43, 44.80it/s]

Ep 6900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 28%|██▊       | 7006/25000 [02:43<06:41, 44.78it/s]

Ep 7000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 28%|██▊       | 7106/25000 [02:45<06:26, 46.27it/s]

Ep 7100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 29%|██▉       | 7206/25000 [02:47<06:22, 46.54it/s]

Ep 7200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.0, First Action 1


 29%|██▉       | 7307/25000 [02:50<08:12, 35.94it/s]

Ep 7300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.0, First Action 1


 30%|██▉       | 7405/25000 [02:53<07:21, 39.89it/s]

Ep 7400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 30%|███       | 7505/25000 [02:55<06:13, 46.90it/s]

Ep 7500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 30%|███       | 7605/25000 [02:57<06:19, 45.85it/s]

Ep 7600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 31%|███       | 7705/25000 [02:59<06:08, 46.95it/s]

Ep 7700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 31%|███       | 7805/25000 [03:01<06:07, 46.73it/s]

Ep 7800/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.2, First Action 1


 32%|███▏      | 7905/25000 [03:04<07:46, 36.65it/s]

Ep 7900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.9, First Action 1


 32%|███▏      | 8008/25000 [03:07<06:48, 41.64it/s]

Ep 8000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 32%|███▏      | 8108/25000 [03:09<06:00, 46.91it/s]

Ep 8100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 33%|███▎      | 8208/25000 [03:11<06:11, 45.17it/s]

Ep 8200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 1


 33%|███▎      | 8308/25000 [03:13<06:05, 45.73it/s]

Ep 8300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 34%|███▎      | 8408/25000 [03:15<06:03, 45.66it/s]

Ep 8400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 34%|███▍      | 8507/25000 [03:18<07:00, 39.21it/s]

Ep 8500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 34%|███▍      | 8605/25000 [03:20<07:27, 36.62it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.8, First Action 1


 35%|███▍      | 8705/25000 [03:23<05:48, 46.76it/s]

Ep 8700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 35%|███▌      | 8805/25000 [03:25<05:52, 45.97it/s]

Ep 8800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 36%|███▌      | 8905/25000 [03:27<05:57, 45.02it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 36%|███▌      | 9005/25000 [03:29<05:43, 46.56it/s]

Ep 9000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 36%|███▋      | 9107/25000 [03:32<06:42, 39.44it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 37%|███▋      | 9207/25000 [03:35<07:32, 34.87it/s]

Ep 9200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 37%|███▋      | 9307/25000 [03:37<05:34, 46.88it/s]

Ep 9300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 38%|███▊      | 9407/25000 [03:39<05:49, 44.62it/s]

Ep 9400/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.1, First Action 1


 38%|███▊      | 9507/25000 [03:41<05:54, 43.66it/s]

Ep 9500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 38%|███▊      | 9607/25000 [03:43<05:38, 45.43it/s]

Ep 9600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 39%|███▉      | 9704/25000 [03:46<06:43, 37.93it/s]

Ep 9700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 39%|███▉      | 9804/25000 [03:49<07:31, 33.68it/s]

Ep 9800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 40%|███▉      | 9907/25000 [03:51<05:33, 45.23it/s]

Ep 9900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 40%|████      | 10007/25000 [03:53<05:29, 45.55it/s]

Ep 10000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 40%|████      | 10107/25000 [03:55<05:19, 46.65it/s]

Ep 10100/25000, Opt. Action: 3, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.4, First Action 1


 41%|████      | 10207/25000 [03:57<05:21, 45.98it/s]

Ep 10200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 41%|████      | 10305/25000 [04:00<06:31, 37.54it/s]

Ep 10300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 42%|████▏     | 10405/25000 [04:03<07:16, 33.41it/s]

Ep 10400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 42%|████▏     | 10504/25000 [04:05<05:16, 45.75it/s]

Ep 10500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 42%|████▏     | 10609/25000 [04:07<05:13, 45.92it/s]

Ep 10600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 43%|████▎     | 10709/25000 [04:09<05:11, 45.91it/s]

Ep 10700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 43%|████▎     | 10809/25000 [04:11<05:03, 46.72it/s]

Ep 10800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.2, First Action 1


 44%|████▎     | 10907/25000 [04:14<06:16, 37.46it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.4, First Action 1


 44%|████▍     | 11004/25000 [04:17<06:36, 35.30it/s]

Ep 11000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 44%|████▍     | 11107/25000 [04:19<05:04, 45.61it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 45%|████▍     | 11207/25000 [04:21<05:01, 45.73it/s]

Ep 11200/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.8, First Action 1


 45%|████▌     | 11307/25000 [04:23<04:54, 46.45it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 1


 46%|████▌     | 11407/25000 [04:25<04:47, 47.31it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 46%|████▌     | 11506/25000 [04:28<06:20, 35.51it/s]

Ep 11500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 46%|████▋     | 11605/25000 [04:31<06:33, 34.06it/s]

Ep 11600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 47%|████▋     | 11707/25000 [04:33<04:49, 45.89it/s]

Ep 11700/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.6, First Action 1


 47%|████▋     | 11807/25000 [04:35<04:48, 45.74it/s]

Ep 11800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 48%|████▊     | 11907/25000 [04:37<04:51, 44.85it/s]

Ep 11900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 1


 48%|████▊     | 12007/25000 [04:39<04:48, 45.09it/s]

Ep 12000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 48%|████▊     | 12107/25000 [04:42<05:42, 37.65it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 49%|████▉     | 12207/25000 [04:45<06:22, 33.42it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 49%|████▉     | 12309/25000 [04:47<04:35, 46.05it/s]

Ep 12300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 50%|████▉     | 12409/25000 [04:49<04:30, 46.61it/s]

Ep 12400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 50%|█████     | 12509/25000 [04:51<04:30, 46.11it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.6, First Action 1


 50%|█████     | 12609/25000 [04:53<04:25, 46.60it/s]

Ep 12600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 51%|█████     | 12705/25000 [04:56<05:46, 35.44it/s]

Ep 12700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 51%|█████     | 12805/25000 [04:59<05:49, 34.88it/s]

Ep 12800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 1


 52%|█████▏    | 12905/25000 [05:01<04:27, 45.29it/s]

Ep 12900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 52%|█████▏    | 13005/25000 [05:03<04:26, 45.05it/s]

Ep 13000/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.7, First Action 1


 52%|█████▏    | 13105/25000 [05:05<04:19, 45.84it/s]

Ep 13100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 53%|█████▎    | 13205/25000 [05:07<04:15, 46.14it/s]

Ep 13200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 1


 53%|█████▎    | 13307/25000 [05:10<05:25, 35.88it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 54%|█████▎    | 13404/25000 [05:13<05:29, 35.21it/s]

Ep 13400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 54%|█████▍    | 13508/25000 [05:15<04:11, 45.64it/s]

Ep 13500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.5, First Action 1


 54%|█████▍    | 13608/25000 [05:17<04:08, 45.89it/s]

Ep 13600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 55%|█████▍    | 13708/25000 [05:19<04:04, 46.20it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 1


 55%|█████▌    | 13808/25000 [05:21<03:59, 46.71it/s]

Ep 13800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 56%|█████▌    | 13907/25000 [05:24<04:50, 38.21it/s]

Ep 13900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 56%|█████▌    | 14006/25000 [05:26<05:14, 34.97it/s]

Ep 14000/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.6, First Action 1


 56%|█████▋    | 14106/25000 [05:29<03:59, 45.51it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 1


 57%|█████▋    | 14206/25000 [05:31<04:00, 44.84it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 57%|█████▋    | 14306/25000 [05:33<03:57, 45.02it/s]

Ep 14300/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.7, First Action 1


 58%|█████▊    | 14406/25000 [05:35<03:48, 46.30it/s]

Ep 14400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 58%|█████▊    | 14505/25000 [05:38<04:37, 37.82it/s]

Ep 14500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 58%|█████▊    | 14605/25000 [05:40<05:06, 33.90it/s]

Ep 14600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 59%|█████▉    | 14708/25000 [05:43<03:48, 45.06it/s]

Ep 14700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 59%|█████▉    | 14808/25000 [05:45<03:39, 46.38it/s]

Ep 14800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 60%|█████▉    | 14908/25000 [05:47<03:39, 46.00it/s]

Ep 14900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.2, First Action 1


 60%|██████    | 15008/25000 [05:50<03:31, 47.16it/s]

Ep 15000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 60%|██████    | 15106/25000 [05:52<04:27, 36.97it/s]

Ep 15100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 61%|██████    | 15206/25000 [05:55<04:47, 34.09it/s]

Ep 15200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 61%|██████    | 15308/25000 [05:57<03:32, 45.54it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 62%|██████▏   | 15408/25000 [05:59<03:32, 45.19it/s]

Ep 15400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 62%|██████▏   | 15508/25000 [06:01<03:24, 46.36it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.5, First Action 1


 62%|██████▏   | 15608/25000 [06:04<03:22, 46.46it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 63%|██████▎   | 15703/25000 [06:06<03:46, 41.09it/s]

Ep 15700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 63%|██████▎   | 15804/25000 [06:09<04:26, 34.54it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 1


 64%|██████▎   | 15905/25000 [06:11<03:19, 45.67it/s]

Ep 15900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 64%|██████▍   | 16005/25000 [06:13<03:12, 46.76it/s]

Ep 16000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 64%|██████▍   | 16105/25000 [06:15<03:12, 46.28it/s]

Ep 16100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 65%|██████▍   | 16205/25000 [06:18<03:08, 46.68it/s]

Ep 16200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 65%|██████▌   | 16305/25000 [06:20<03:51, 37.52it/s]

Ep 16300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 66%|██████▌   | 16405/25000 [06:23<04:11, 34.16it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 66%|██████▌   | 16509/25000 [06:25<03:08, 45.08it/s]

Ep 16500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 66%|██████▋   | 16609/25000 [06:27<03:03, 45.69it/s]

Ep 16600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 67%|██████▋   | 16709/25000 [06:29<02:57, 46.83it/s]

Ep 16700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 67%|██████▋   | 16809/25000 [06:32<03:01, 45.23it/s]

Ep 16800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 68%|██████▊   | 16903/25000 [06:34<03:40, 36.70it/s]

Ep 16900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 68%|██████▊   | 17004/25000 [06:37<04:18, 30.90it/s]

Ep 17000/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.5, First Action 1


 68%|██████▊   | 17105/25000 [06:39<02:55, 45.11it/s]

Ep 17100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 69%|██████▉   | 17205/25000 [06:41<02:47, 46.64it/s]

Ep 17200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 69%|██████▉   | 17305/25000 [06:44<02:43, 47.07it/s]

Ep 17300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 70%|██████▉   | 17405/25000 [06:46<02:44, 46.15it/s]

Ep 17400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 70%|███████   | 17507/25000 [06:48<03:24, 36.71it/s]

Ep 17500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 70%|███████   | 17603/25000 [06:51<03:43, 33.13it/s]

Ep 17600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 71%|███████   | 17709/25000 [06:53<02:37, 46.39it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.4, First Action 1


 71%|███████   | 17809/25000 [06:55<02:33, 46.72it/s]

Ep 17800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 72%|███████▏  | 17909/25000 [06:58<02:35, 45.57it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 72%|███████▏  | 18009/25000 [07:00<02:30, 46.35it/s]

Ep 18000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 72%|███████▏  | 18107/25000 [07:02<03:00, 38.09it/s]

Ep 18100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 73%|███████▎  | 18203/25000 [07:05<03:23, 33.46it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 73%|███████▎  | 18307/25000 [07:07<02:23, 46.79it/s]

Ep 18300/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.0, First Action 1


 74%|███████▎  | 18407/25000 [07:09<02:19, 47.17it/s]

Ep 18400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 74%|███████▍  | 18507/25000 [07:12<02:22, 45.52it/s]

Ep 18500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 74%|███████▍  | 18607/25000 [07:14<02:23, 44.67it/s]

Ep 18600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 75%|███████▍  | 18705/25000 [07:16<02:51, 36.77it/s]

Ep 18700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.3, First Action 1


 75%|███████▌  | 18806/25000 [07:19<03:08, 32.78it/s]

Ep 18800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 76%|███████▌  | 18907/25000 [07:21<02:11, 46.17it/s]

Ep 18900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.0, First Action 1


 76%|███████▌  | 19007/25000 [07:23<02:08, 46.76it/s]

Ep 19000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.1, First Action 1


 76%|███████▋  | 19107/25000 [07:26<02:07, 46.25it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 77%|███████▋  | 19207/25000 [07:28<02:07, 45.49it/s]

Ep 19200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 77%|███████▋  | 19305/25000 [07:30<02:28, 38.35it/s]

Ep 19300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.8, First Action 1


 78%|███████▊  | 19406/25000 [07:33<02:43, 34.19it/s]

Ep 19400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.2, First Action 1


 78%|███████▊  | 19506/25000 [07:35<01:57, 46.86it/s]

Ep 19500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 78%|███████▊  | 19606/25000 [07:37<01:54, 46.91it/s]

Ep 19600/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.8, First Action 1


 79%|███████▉  | 19706/25000 [07:40<01:52, 47.19it/s]

Ep 19700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 1


 79%|███████▉  | 19806/25000 [07:42<01:54, 45.22it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 80%|███████▉  | 19905/25000 [07:44<02:18, 36.73it/s]

Ep 19900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 80%|████████  | 20006/25000 [07:47<02:31, 32.92it/s]

Ep 20000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.1, First Action 1


 80%|████████  | 20109/25000 [07:49<01:44, 46.69it/s]

Ep 20100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 81%|████████  | 20209/25000 [07:52<01:43, 46.11it/s]

Ep 20200/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.4, First Action 1


 81%|████████  | 20309/25000 [07:54<01:40, 46.54it/s]

Ep 20300/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 1


 82%|████████▏ | 20409/25000 [07:56<01:39, 45.95it/s]

Ep 20400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 82%|████████▏ | 20507/25000 [07:58<01:59, 37.63it/s]

Ep 20500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 82%|████████▏ | 20604/25000 [08:01<02:11, 33.50it/s]

Ep 20600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 83%|████████▎ | 20706/25000 [08:03<01:33, 45.94it/s]

Ep 20700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 1


 83%|████████▎ | 20806/25000 [08:06<01:28, 47.24it/s]

Ep 20800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 1


 84%|████████▎ | 20906/25000 [08:08<01:27, 46.68it/s]

Ep 20900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.0, First Action 1


 84%|████████▍ | 21006/25000 [08:10<01:29, 44.66it/s]

Ep 21000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 84%|████████▍ | 21106/25000 [08:12<01:38, 39.47it/s]

Ep 21100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 85%|████████▍ | 21204/25000 [08:15<01:53, 33.34it/s]

Ep 21200/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.0, First Action 1


 85%|████████▌ | 21305/25000 [08:17<01:20, 46.10it/s]

Ep 21300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 86%|████████▌ | 21405/25000 [08:20<01:17, 46.49it/s]

Ep 21400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.7, First Action 1


 86%|████████▌ | 21505/25000 [08:22<01:15, 46.10it/s]

Ep 21500/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.8, First Action 1


 86%|████████▋ | 21605/25000 [08:24<01:15, 44.96it/s]

Ep 21600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 87%|████████▋ | 21705/25000 [08:26<01:16, 42.99it/s]

Ep 21700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 87%|████████▋ | 21805/25000 [08:29<01:30, 35.27it/s]

Ep 21800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 1


 88%|████████▊ | 21909/25000 [08:31<01:07, 45.98it/s]

Ep 21900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 88%|████████▊ | 22009/25000 [08:34<01:03, 47.04it/s]

Ep 22000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.2, First Action 1


 88%|████████▊ | 22109/25000 [08:36<01:02, 46.24it/s]

Ep 22100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 89%|████████▉ | 22204/25000 [08:38<01:01, 45.30it/s]

Ep 22200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 89%|████████▉ | 22304/25000 [08:40<00:58, 45.74it/s]

Ep 22300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 90%|████████▉ | 22404/25000 [08:43<01:13, 35.39it/s]

Ep 22400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 90%|█████████ | 22507/25000 [08:45<00:56, 44.31it/s]

Ep 22500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 1


 90%|█████████ | 22607/25000 [08:48<00:51, 46.32it/s]

Ep 22600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 91%|█████████ | 22707/25000 [08:50<00:50, 45.56it/s]

Ep 22700/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.2, First Action 1


 91%|█████████ | 22807/25000 [08:52<00:50, 43.60it/s]

Ep 22800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 92%|█████████▏| 22907/25000 [08:54<00:51, 40.70it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.4, First Action 1


 92%|█████████▏| 23004/25000 [08:57<00:59, 33.34it/s]

Ep 23000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.1, First Action 1


 92%|█████████▏| 23105/25000 [09:00<00:40, 46.33it/s]

Ep 23100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 93%|█████████▎| 23205/25000 [09:02<00:38, 46.48it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 93%|█████████▎| 23305/25000 [09:04<00:37, 45.09it/s]

Ep 23300/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.8, First Action 1


 94%|█████████▎| 23405/25000 [09:06<00:35, 45.10it/s]

Ep 23400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 1


 94%|█████████▍| 23505/25000 [09:08<00:37, 40.28it/s]

Ep 23500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 94%|█████████▍| 23603/25000 [09:11<00:40, 34.63it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 95%|█████████▍| 23709/25000 [09:14<00:27, 46.32it/s]

Ep 23700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 1


 95%|█████████▌| 23809/25000 [09:16<00:26, 44.99it/s]

Ep 23800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 96%|█████████▌| 23905/25000 [09:18<00:23, 45.73it/s]

Ep 23900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 96%|█████████▌| 24005/25000 [09:20<00:21, 46.90it/s]

Ep 24000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.3, First Action 1


 96%|█████████▋| 24105/25000 [09:22<00:20, 42.93it/s]

Ep 24100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 97%|█████████▋| 24205/25000 [09:25<00:23, 33.93it/s]

Ep 24200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 97%|█████████▋| 24307/25000 [09:28<00:15, 45.60it/s]

Ep 24300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 98%|█████████▊| 24407/25000 [09:30<00:13, 45.55it/s]

Ep 24400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 1


 98%|█████████▊| 24507/25000 [09:32<00:10, 45.67it/s]

Ep 24500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 98%|█████████▊| 24607/25000 [09:34<00:08, 45.10it/s]

Ep 24600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 99%|█████████▉| 24707/25000 [09:36<00:07, 39.15it/s]

Ep 24700/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.4, First Action 1


 99%|█████████▉| 24803/25000 [09:39<00:05, 33.85it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.4, First Action 1


100%|█████████▉| 24909/25000 [09:42<00:02, 45.26it/s]

Ep 24900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


100%|██████████| 25000/25000 [09:44<00:00, 42.79it/s]


Ep 25000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 1

TEST:


 41%|████      | 122/300 [00:00<00:01, 145.44it/s]

Ep 100/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 77%|███████▋  | 230/300 [00:01<00:00, 148.41it/s]

Ep 200/300, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.8, First Action 1


100%|██████████| 300/300 [00:02<00:00, 141.91it/s]


Ep 300/300, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.7, First Action 1

GAMMA 1 - LR 0.001 - Entropy Decay True


  0%|          | 109/25000 [00:02<08:39, 47.95it/s]

Ep 100/25000, Opt. Action: 4, Reward: 4.0, Cumulative-Regret: 21.0, AVG100-Regret: 17.9, First Action 10


  1%|          | 204/25000 [00:04<10:21, 39.92it/s]

Ep 200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


  1%|          | 303/25000 [00:07<11:52, 34.66it/s]

Ep 300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


  2%|▏         | 406/25000 [00:09<08:52, 46.20it/s]

Ep 400/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 4


  2%|▏         | 506/25000 [00:11<08:54, 45.80it/s]

Ep 500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  2%|▏         | 606/25000 [00:14<08:56, 45.45it/s]

Ep 600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


  3%|▎         | 706/25000 [00:16<08:51, 45.69it/s]

Ep 700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


  3%|▎         | 805/25000 [00:18<10:49, 37.23it/s]

Ep 800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


  4%|▎         | 906/25000 [00:21<11:31, 34.82it/s]

Ep 900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


  4%|▍         | 1007/25000 [00:23<08:51, 45.17it/s]

Ep 1000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


  4%|▍         | 1107/25000 [00:25<08:46, 45.35it/s]

Ep 1100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


  5%|▍         | 1207/25000 [00:28<08:34, 46.22it/s]

Ep 1200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 6


  5%|▌         | 1307/25000 [00:30<08:33, 46.18it/s]

Ep 1300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


  6%|▌         | 1407/25000 [00:32<09:20, 42.11it/s]

Ep 1400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


  6%|▌         | 1504/25000 [00:35<10:49, 36.19it/s]

Ep 1500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


  6%|▋         | 1607/25000 [00:37<08:27, 46.12it/s]

Ep 1600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


  7%|▋         | 1707/25000 [00:39<08:33, 45.37it/s]

Ep 1700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


  7%|▋         | 1807/25000 [00:42<08:17, 46.61it/s]

Ep 1800/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.3, First Action 5


  8%|▊         | 1907/25000 [00:44<08:28, 45.44it/s]

Ep 1900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 5


  8%|▊         | 2007/25000 [00:46<08:15, 46.41it/s]

Ep 2000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


  8%|▊         | 2107/25000 [00:49<10:32, 36.21it/s]

Ep 2100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


  9%|▉         | 2205/25000 [00:51<08:19, 45.62it/s]

Ep 2200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


  9%|▉         | 2305/25000 [00:53<08:30, 44.42it/s]

Ep 2300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 10%|▉         | 2405/25000 [00:55<08:12, 45.90it/s]

Ep 2400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 5


 10%|█         | 2505/25000 [00:58<08:08, 46.00it/s]

Ep 2500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 10%|█         | 2605/25000 [01:00<08:17, 45.06it/s]

Ep 2600/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 1


 11%|█         | 2704/25000 [01:02<09:53, 37.55it/s]

Ep 2700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 11%|█         | 2807/25000 [01:05<07:57, 46.48it/s]

Ep 2800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 12%|█▏        | 2907/25000 [01:07<08:06, 45.41it/s]

Ep 2900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 12%|█▏        | 3007/25000 [01:09<07:48, 46.93it/s]

Ep 3000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 12%|█▏        | 3107/25000 [01:12<07:54, 46.15it/s]

Ep 3100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 6


 13%|█▎        | 3207/25000 [01:14<07:56, 45.72it/s]

Ep 3200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 13%|█▎        | 3304/25000 [01:16<09:41, 37.29it/s]

Ep 3300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 5


 14%|█▎        | 3408/25000 [01:19<07:53, 45.58it/s]

Ep 3400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 5


 14%|█▍        | 3508/25000 [01:21<07:54, 45.27it/s]

Ep 3500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 14%|█▍        | 3608/25000 [01:23<07:51, 45.41it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 15%|█▍        | 3708/25000 [01:26<07:39, 46.30it/s]

Ep 3700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 15%|█▌        | 3808/25000 [01:28<07:37, 46.28it/s]

Ep 3800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 16%|█▌        | 3906/25000 [01:30<10:10, 34.53it/s]

Ep 3900/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 16%|█▌        | 4005/25000 [01:33<07:40, 45.62it/s]

Ep 4000/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 6


 16%|█▋        | 4105/25000 [01:35<07:34, 45.99it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 17%|█▋        | 4205/25000 [01:37<07:33, 45.86it/s]

Ep 4200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 17%|█▋        | 4305/25000 [01:40<07:33, 45.64it/s]

Ep 4300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 18%|█▊        | 4405/25000 [01:42<07:26, 46.13it/s]

Ep 4400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 18%|█▊        | 4505/25000 [01:44<09:12, 37.07it/s]

Ep 4500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 6


 18%|█▊        | 4609/25000 [01:47<07:28, 45.43it/s]

Ep 4600/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 19%|█▉        | 4709/25000 [01:49<07:26, 45.47it/s]

Ep 4700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 19%|█▉        | 4809/25000 [01:52<07:19, 45.92it/s]

Ep 4800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 20%|█▉        | 4909/25000 [01:54<07:15, 46.11it/s]

Ep 4900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 20%|██        | 5009/25000 [01:56<07:07, 46.73it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 6


 20%|██        | 5107/25000 [01:58<08:56, 37.07it/s]

Ep 5100/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 6


 21%|██        | 5205/25000 [02:01<07:25, 44.46it/s]

Ep 5200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 21%|██        | 5305/25000 [02:03<07:03, 46.54it/s]

Ep 5300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 22%|██▏       | 5405/25000 [02:05<06:57, 46.89it/s]

Ep 5400/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 6


 22%|██▏       | 5505/25000 [02:08<07:06, 45.74it/s]

Ep 5500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 22%|██▏       | 5605/25000 [02:10<06:58, 46.29it/s]

Ep 5600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 23%|██▎       | 5705/25000 [02:12<08:34, 37.48it/s]

Ep 5700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 23%|██▎       | 5808/25000 [02:15<07:30, 42.60it/s]

Ep 5800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 24%|██▎       | 5908/25000 [02:17<07:07, 44.65it/s]

Ep 5900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 6


 24%|██▍       | 6008/25000 [02:19<06:50, 46.29it/s]

Ep 6000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 24%|██▍       | 6108/25000 [02:22<06:37, 47.47it/s]

Ep 6100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 25%|██▍       | 6208/25000 [02:24<06:44, 46.41it/s]

Ep 6200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 6


 25%|██▌       | 6305/25000 [02:26<08:28, 36.79it/s]

Ep 6300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 26%|██▌       | 6405/25000 [02:29<07:22, 42.03it/s]

Ep 6400/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 6


 26%|██▌       | 6505/25000 [02:31<06:33, 47.01it/s]

Ep 6500/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 6


 26%|██▋       | 6605/25000 [02:33<06:32, 46.92it/s]

Ep 6600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 27%|██▋       | 6705/25000 [02:35<06:30, 46.90it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 27%|██▋       | 6805/25000 [02:38<06:37, 45.76it/s]

Ep 6800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 28%|██▊       | 6908/25000 [02:40<07:48, 38.62it/s]

Ep 6900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 6


 28%|██▊       | 7007/25000 [02:43<07:16, 41.21it/s]

Ep 7000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 28%|██▊       | 7107/25000 [02:45<06:34, 45.37it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 29%|██▉       | 7207/25000 [02:47<06:36, 44.83it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 29%|██▉       | 7307/25000 [02:49<06:26, 45.80it/s]

Ep 7300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 30%|██▉       | 7407/25000 [02:52<06:13, 47.15it/s]

Ep 7400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 30%|███       | 7508/25000 [02:54<07:38, 38.18it/s]

Ep 7500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 30%|███       | 7608/25000 [02:57<07:02, 41.12it/s]

Ep 7600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 31%|███       | 7708/25000 [02:59<06:09, 46.77it/s]

Ep 7700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 31%|███       | 7808/25000 [03:01<06:11, 46.31it/s]

Ep 7800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 32%|███▏      | 7908/25000 [03:03<06:09, 46.21it/s]

Ep 7900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 32%|███▏      | 8008/25000 [03:06<06:00, 47.16it/s]

Ep 8000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 32%|███▏      | 8105/25000 [03:08<07:22, 38.16it/s]

Ep 8100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 6


 33%|███▎      | 8204/25000 [03:11<08:13, 34.04it/s]

Ep 8200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 33%|███▎      | 8308/25000 [03:13<06:02, 46.00it/s]

Ep 8300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 34%|███▎      | 8408/25000 [03:15<05:55, 46.62it/s]

Ep 8400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 34%|███▍      | 8508/25000 [03:17<06:02, 45.51it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 34%|███▍      | 8608/25000 [03:19<05:51, 46.70it/s]

Ep 8600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 35%|███▍      | 8707/25000 [03:22<07:34, 35.82it/s]

Ep 8700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 35%|███▌      | 8805/25000 [03:25<07:41, 35.06it/s]

Ep 8800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 36%|███▌      | 8908/25000 [03:27<05:44, 46.77it/s]

Ep 8900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 36%|███▌      | 9008/25000 [03:29<05:40, 46.91it/s]

Ep 9000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 36%|███▋      | 9108/25000 [03:31<05:48, 45.59it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 37%|███▋      | 9208/25000 [03:33<05:40, 46.44it/s]

Ep 9200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 37%|███▋      | 9304/25000 [03:36<07:35, 34.43it/s]

Ep 9300/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 6


 38%|███▊      | 9404/25000 [03:39<07:16, 35.77it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 38%|███▊      | 9506/25000 [03:41<05:27, 47.37it/s]

Ep 9500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 6


 38%|███▊      | 9606/25000 [03:43<05:32, 46.29it/s]

Ep 9600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 39%|███▉      | 9706/25000 [03:45<05:44, 44.36it/s]

Ep 9700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 39%|███▉      | 9806/25000 [03:47<05:32, 45.66it/s]

Ep 9800/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 6


 40%|███▉      | 9907/25000 [03:50<06:33, 38.37it/s]

Ep 9900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 40%|████      | 10004/25000 [03:52<07:17, 34.25it/s]

Ep 10000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 40%|████      | 10109/25000 [03:55<05:18, 46.73it/s]

Ep 10100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 41%|████      | 10209/25000 [03:57<05:17, 46.52it/s]

Ep 10200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 41%|████      | 10309/25000 [03:59<05:13, 46.83it/s]

Ep 10300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 42%|████▏     | 10409/25000 [04:01<05:19, 45.60it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 42%|████▏     | 10506/25000 [04:04<06:25, 37.56it/s]

Ep 10500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 42%|████▏     | 10604/25000 [04:06<07:05, 33.81it/s]

Ep 10600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 43%|████▎     | 10706/25000 [04:09<05:10, 46.05it/s]

Ep 10700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 43%|████▎     | 10806/25000 [04:11<05:07, 46.10it/s]

Ep 10800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 44%|████▎     | 10906/25000 [04:13<05:08, 45.69it/s]

Ep 10900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 44%|████▍     | 11006/25000 [04:15<05:29, 42.50it/s]

Ep 11000/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 6


 44%|████▍     | 11105/25000 [04:18<06:07, 37.77it/s]

Ep 11100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 45%|████▍     | 11204/25000 [04:20<06:30, 35.35it/s]

Ep 11200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 45%|████▌     | 11306/25000 [04:23<04:54, 46.43it/s]

Ep 11300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 46%|████▌     | 11406/25000 [04:25<04:47, 47.24it/s]

Ep 11400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 46%|████▌     | 11506/25000 [04:27<04:59, 45.10it/s]

Ep 11500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 46%|████▋     | 11606/25000 [04:29<04:51, 46.01it/s]

Ep 11600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 47%|████▋     | 11706/25000 [04:32<05:33, 39.89it/s]

Ep 11700/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 6


 47%|████▋     | 11805/25000 [04:34<06:23, 34.39it/s]

Ep 11800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 48%|████▊     | 11905/25000 [04:37<04:36, 47.28it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 6


 48%|████▊     | 12005/25000 [04:39<04:31, 47.94it/s]

Ep 12000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 48%|████▊     | 12105/25000 [04:41<04:41, 45.84it/s]

Ep 12100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 49%|████▉     | 12205/25000 [04:43<04:41, 45.51it/s]

Ep 12200/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 6


 49%|████▉     | 12305/25000 [04:46<04:37, 45.68it/s]

Ep 12300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 50%|████▉     | 12406/25000 [04:48<05:41, 36.83it/s]

Ep 12400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 50%|█████     | 12508/25000 [04:51<04:27, 46.67it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 50%|█████     | 12608/25000 [04:53<04:23, 46.95it/s]

Ep 12600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 51%|█████     | 12708/25000 [04:55<04:22, 46.80it/s]

Ep 12700/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 6


 51%|█████     | 12808/25000 [04:57<04:27, 45.56it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 52%|█████▏    | 12908/25000 [05:00<04:20, 46.43it/s]

Ep 12900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 52%|█████▏    | 13006/25000 [05:02<05:19, 37.53it/s]

Ep 13000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 52%|█████▏    | 13108/25000 [05:05<04:16, 46.43it/s]

Ep 13100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 53%|█████▎    | 13208/25000 [05:07<04:10, 47.02it/s]

Ep 13200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 53%|█████▎    | 13308/25000 [05:09<04:16, 45.62it/s]

Ep 13300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 54%|█████▎    | 13408/25000 [05:11<04:17, 44.99it/s]

Ep 13400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 54%|█████▍    | 13508/25000 [05:13<04:09, 46.04it/s]

Ep 13500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 54%|█████▍    | 13607/25000 [05:16<05:08, 36.96it/s]

Ep 13600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 55%|█████▍    | 13707/25000 [05:19<04:06, 45.74it/s]

Ep 13700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 55%|█████▌    | 13807/25000 [05:21<04:02, 46.17it/s]

Ep 13800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 56%|█████▌    | 13907/25000 [05:23<03:57, 46.69it/s]

Ep 13900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 56%|█████▌    | 14007/25000 [05:25<03:58, 46.04it/s]

Ep 14000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 56%|█████▋    | 14107/25000 [05:27<04:01, 45.19it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 57%|█████▋    | 14207/25000 [05:30<04:49, 37.29it/s]

Ep 14200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 57%|█████▋    | 14306/25000 [05:33<04:02, 44.09it/s]

Ep 14300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 58%|█████▊    | 14406/25000 [05:35<03:43, 47.50it/s]

Ep 14400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 58%|█████▊    | 14506/25000 [05:37<03:41, 47.42it/s]

Ep 14500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 58%|█████▊    | 14606/25000 [05:39<03:42, 46.73it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 59%|█████▉    | 14706/25000 [05:41<03:51, 44.49it/s]

Ep 14700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 59%|█████▉    | 14806/25000 [05:44<04:36, 36.85it/s]

Ep 14800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 60%|█████▉    | 14909/25000 [05:47<03:46, 44.52it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 60%|██████    | 15009/25000 [05:49<03:32, 46.99it/s]

Ep 15000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 60%|██████    | 15109/25000 [05:51<03:30, 46.89it/s]

Ep 15100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 61%|██████    | 15209/25000 [05:53<03:31, 46.31it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 61%|██████    | 15309/25000 [05:55<03:31, 45.86it/s]

Ep 15300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 62%|██████▏   | 15405/25000 [05:58<04:29, 35.56it/s]

Ep 15400/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 6


 62%|██████▏   | 15509/25000 [06:01<03:27, 45.79it/s]

Ep 15500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 62%|██████▏   | 15609/25000 [06:03<03:16, 47.73it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 6


 63%|██████▎   | 15709/25000 [06:05<03:16, 47.22it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 63%|██████▎   | 15809/25000 [06:07<03:20, 45.94it/s]

Ep 15800/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 64%|██████▎   | 15904/25000 [06:09<03:22, 44.85it/s]

Ep 15900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 64%|██████▍   | 16006/25000 [06:12<03:57, 37.83it/s]

Ep 16000/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 64%|██████▍   | 16108/25000 [06:15<03:21, 44.06it/s]

Ep 16100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 65%|██████▍   | 16208/25000 [06:17<03:13, 45.32it/s]

Ep 16200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 65%|██████▌   | 16308/25000 [06:19<03:06, 46.66it/s]

Ep 16300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 66%|██████▌   | 16408/25000 [06:21<03:02, 47.13it/s]

Ep 16400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 66%|██████▌   | 16508/25000 [06:23<03:06, 45.57it/s]

Ep 16500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 66%|██████▋   | 16605/25000 [06:26<03:46, 37.06it/s]

Ep 16600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 67%|██████▋   | 16706/25000 [06:28<03:09, 43.72it/s]

Ep 16700/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 6


 67%|██████▋   | 16806/25000 [06:31<02:55, 46.59it/s]

Ep 16800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 68%|██████▊   | 16906/25000 [06:33<02:53, 46.58it/s]

Ep 16900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 68%|██████▊   | 17006/25000 [06:35<02:49, 47.06it/s]

Ep 17000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 68%|██████▊   | 17106/25000 [06:37<02:46, 47.28it/s]

Ep 17100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 69%|██████▉   | 17205/25000 [06:40<03:27, 37.64it/s]

Ep 17200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 69%|██████▉   | 17306/25000 [06:42<03:00, 42.55it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 70%|██████▉   | 17406/25000 [06:45<02:41, 47.05it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 70%|███████   | 17506/25000 [06:47<02:44, 45.44it/s]

Ep 17500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 6


 70%|███████   | 17606/25000 [06:49<02:39, 46.48it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 71%|███████   | 17706/25000 [06:51<02:38, 46.14it/s]

Ep 17700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 71%|███████   | 17805/25000 [06:54<03:23, 35.39it/s]

Ep 17800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 72%|███████▏  | 17906/25000 [06:56<02:42, 43.68it/s]

Ep 17900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 72%|███████▏  | 18006/25000 [06:59<02:28, 47.09it/s]

Ep 18000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 6


 72%|███████▏  | 18106/25000 [07:01<02:26, 46.95it/s]

Ep 18100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 73%|███████▎  | 18206/25000 [07:03<02:22, 47.71it/s]

Ep 18200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 73%|███████▎  | 18306/25000 [07:05<02:22, 47.13it/s]

Ep 18300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 74%|███████▎  | 18405/25000 [07:08<03:00, 36.61it/s]

Ep 18400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 74%|███████▍  | 18508/25000 [07:10<02:34, 42.14it/s]

Ep 18500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 74%|███████▍  | 18608/25000 [07:13<02:18, 46.01it/s]

Ep 18600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 75%|███████▍  | 18708/25000 [07:15<02:17, 45.65it/s]

Ep 18700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 6


 75%|███████▌  | 18808/25000 [07:17<02:15, 45.75it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 76%|███████▌  | 18908/25000 [07:19<02:11, 46.50it/s]

Ep 18900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 76%|███████▌  | 19005/25000 [07:21<02:42, 36.90it/s]

Ep 19000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 76%|███████▋  | 19105/25000 [07:24<02:58, 33.08it/s]

Ep 19100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 77%|███████▋  | 19205/25000 [07:26<02:06, 45.70it/s]

Ep 19200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 77%|███████▋  | 19305/25000 [07:29<02:01, 46.71it/s]

Ep 19300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 78%|███████▊  | 19405/25000 [07:31<02:00, 46.49it/s]

Ep 19400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 78%|███████▊  | 19505/25000 [07:33<01:57, 46.94it/s]

Ep 19500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 78%|███████▊  | 19607/25000 [07:35<02:25, 37.03it/s]

Ep 19600/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 6


 79%|███████▉  | 19705/25000 [07:38<02:26, 36.25it/s]

Ep 19700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 79%|███████▉  | 19805/25000 [07:40<01:52, 46.13it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 80%|███████▉  | 19905/25000 [07:42<01:49, 46.44it/s]

Ep 19900/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 6


 80%|████████  | 20005/25000 [07:45<01:46, 46.87it/s]

Ep 20000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 80%|████████  | 20105/25000 [07:47<01:47, 45.56it/s]

Ep 20100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 81%|████████  | 20207/25000 [07:49<02:05, 38.14it/s]

Ep 20200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 81%|████████  | 20304/25000 [07:52<02:22, 32.94it/s]

Ep 20300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 82%|████████▏ | 20408/25000 [07:54<01:40, 45.48it/s]

Ep 20400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 82%|████████▏ | 20508/25000 [07:57<01:36, 46.68it/s]

Ep 20500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 82%|████████▏ | 20608/25000 [07:59<01:34, 46.54it/s]

Ep 20600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 83%|████████▎ | 20708/25000 [08:01<01:30, 47.44it/s]

Ep 20700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 83%|████████▎ | 20806/25000 [08:03<01:59, 34.96it/s]

Ep 20800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 84%|████████▎ | 20906/25000 [08:06<01:53, 36.06it/s]

Ep 20900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 84%|████████▍ | 21009/25000 [08:08<01:25, 46.90it/s]

Ep 21000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 84%|████████▍ | 21109/25000 [08:11<01:23, 46.52it/s]

Ep 21100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 6


 85%|████████▍ | 21209/25000 [08:13<01:21, 46.42it/s]

Ep 21200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 6


 85%|████████▌ | 21309/25000 [08:15<01:21, 45.24it/s]

Ep 21300/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 6


 86%|████████▌ | 21406/25000 [08:17<01:33, 38.48it/s]

Ep 21400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


 86%|████████▌ | 21504/25000 [08:20<01:44, 33.41it/s]

Ep 21500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 86%|████████▋ | 21605/25000 [08:22<01:12, 46.64it/s]

Ep 21600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 6


 87%|████████▋ | 21705/25000 [08:24<01:11, 45.83it/s]

Ep 21700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 87%|████████▋ | 21805/25000 [08:27<01:09, 45.96it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 88%|████████▊ | 21905/25000 [08:29<01:06, 46.34it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 88%|████████▊ | 22007/25000 [08:31<01:18, 37.95it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 88%|████████▊ | 22105/25000 [08:34<01:26, 33.33it/s]

Ep 22100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 89%|████████▉ | 22207/25000 [08:36<01:01, 45.45it/s]

Ep 22200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 89%|████████▉ | 22307/25000 [08:38<00:58, 46.08it/s]

Ep 22300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 90%|████████▉ | 22407/25000 [08:41<00:55, 46.65it/s]

Ep 22400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 90%|█████████ | 22507/25000 [08:43<00:52, 47.08it/s]

Ep 22500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 90%|█████████ | 22607/25000 [08:45<00:59, 40.03it/s]

Ep 22600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 91%|█████████ | 22704/25000 [08:48<01:02, 36.84it/s]

Ep 22700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 91%|█████████ | 22805/25000 [08:50<00:47, 46.58it/s]

Ep 22800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 6


 92%|█████████▏| 22905/25000 [08:52<00:46, 45.45it/s]

Ep 22900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 92%|█████████▏| 23005/25000 [08:55<00:43, 45.72it/s]

Ep 23000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 92%|█████████▏| 23105/25000 [08:57<00:41, 46.05it/s]

Ep 23100/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 6


 93%|█████████▎| 23205/25000 [08:59<00:38, 47.18it/s]

Ep 23200/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 93%|█████████▎| 23305/25000 [09:01<00:44, 37.89it/s]

Ep 23300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 94%|█████████▎| 23405/25000 [09:04<00:35, 45.56it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 6


 94%|█████████▍| 23505/25000 [09:06<00:32, 45.44it/s]

Ep 23500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 94%|█████████▍| 23605/25000 [09:08<00:30, 46.19it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 95%|█████████▍| 23705/25000 [09:11<00:27, 46.37it/s]

Ep 23700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 95%|█████████▌| 23805/25000 [09:13<00:25, 46.93it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 96%|█████████▌| 23904/25000 [09:15<00:31, 35.24it/s]

Ep 23900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 96%|█████████▌| 24007/25000 [09:18<00:21, 45.55it/s]

Ep 24000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 96%|█████████▋| 24107/25000 [09:20<00:19, 45.39it/s]

Ep 24100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 97%|█████████▋| 24207/25000 [09:23<00:17, 44.32it/s]

Ep 24200/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 6


 97%|█████████▋| 24307/25000 [09:25<00:15, 45.02it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 98%|█████████▊| 24407/25000 [09:27<00:12, 46.51it/s]

Ep 24400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 98%|█████████▊| 24505/25000 [09:30<00:13, 37.21it/s]

Ep 24500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 98%|█████████▊| 24609/25000 [09:32<00:08, 45.67it/s]

Ep 24600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 99%|█████████▉| 24709/25000 [09:34<00:06, 45.07it/s]

Ep 24700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 99%|█████████▉| 24809/25000 [09:37<00:04, 46.09it/s]

Ep 24800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


100%|█████████▉| 24909/25000 [09:39<00:01, 46.33it/s]

Ep 24900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


100%|██████████| 25000/25000 [09:41<00:00, 43.01it/s]


Ep 25000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6

TEST:


 38%|███▊      | 113/300 [00:00<00:01, 120.45it/s]

Ep 100/300, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.2, First Action 6


 71%|███████▏  | 214/300 [00:01<00:00, 114.52it/s]

Ep 200/300, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 6


100%|██████████| 300/300 [00:02<00:00, 118.22it/s]


Ep 300/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6

GAMMA 0.9 - LR 0.001 - Entropy Decay False


  0%|          | 108/25000 [00:02<09:15, 44.81it/s]

Ep 100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


  1%|          | 208/25000 [00:05<08:54, 46.40it/s]

Ep 200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


  1%|          | 308/25000 [00:07<09:19, 44.17it/s]

Ep 300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 5


  2%|▏         | 408/25000 [00:09<08:46, 46.71it/s]

Ep 400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


  2%|▏         | 508/25000 [00:11<08:44, 46.74it/s]

Ep 500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


  2%|▏         | 604/25000 [00:14<11:06, 36.60it/s]

Ep 600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 2


  3%|▎         | 705/25000 [00:16<09:04, 44.63it/s]

Ep 700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  3%|▎         | 805/25000 [00:18<08:40, 46.47it/s]

Ep 800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


  4%|▎         | 905/25000 [00:21<08:35, 46.76it/s]

Ep 900/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


  4%|▍         | 1005/25000 [00:23<08:30, 46.96it/s]

Ep 1000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 2


  4%|▍         | 1105/25000 [00:25<08:28, 46.96it/s]

Ep 1100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 2


  5%|▍         | 1206/25000 [00:28<10:45, 36.88it/s]

Ep 1200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


  5%|▌         | 1305/25000 [00:30<08:40, 45.53it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  6%|▌         | 1405/25000 [00:32<08:30, 46.20it/s]

Ep 1400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


  6%|▌         | 1505/25000 [00:35<08:34, 45.67it/s]

Ep 1500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 9


  6%|▋         | 1605/25000 [00:37<08:23, 46.43it/s]

Ep 1600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 3


  7%|▋         | 1705/25000 [00:39<08:17, 46.80it/s]

Ep 1700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


  7%|▋         | 1805/25000 [00:41<10:46, 35.87it/s]

Ep 1800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 9


  8%|▊         | 1909/25000 [00:44<08:39, 44.48it/s]

Ep 1900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


  8%|▊         | 2009/25000 [00:46<08:14, 46.53it/s]

Ep 2000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


  8%|▊         | 2109/25000 [00:49<08:13, 46.43it/s]

Ep 2100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.7, First Action 9


  9%|▉         | 2209/25000 [00:51<08:03, 47.18it/s]

Ep 2200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 8


  9%|▉         | 2309/25000 [00:53<07:57, 47.54it/s]

Ep 2300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 10%|▉         | 2403/25000 [00:55<10:27, 36.01it/s]

Ep 2400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 10%|█         | 2506/25000 [00:58<08:59, 41.69it/s]

Ep 2500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 10%|█         | 2606/25000 [01:00<08:08, 45.81it/s]

Ep 2600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 11%|█         | 2706/25000 [01:02<07:57, 46.67it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 8


 11%|█         | 2806/25000 [01:05<07:52, 47.00it/s]

Ep 2800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 9


 12%|█▏        | 2906/25000 [01:07<08:04, 45.57it/s]

Ep 2900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 12%|█▏        | 3006/25000 [01:09<10:00, 36.65it/s]

Ep 3000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 12%|█▏        | 3109/25000 [01:12<09:31, 38.32it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 13%|█▎        | 3205/25000 [01:14<07:54, 45.89it/s]

Ep 3200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


 13%|█▎        | 3305/25000 [01:16<07:53, 45.82it/s]

Ep 3300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 9


 14%|█▎        | 3405/25000 [01:18<07:55, 45.39it/s]

Ep 3400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 9


 14%|█▍        | 3505/25000 [01:21<07:36, 47.05it/s]

Ep 3500/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.9, First Action 9


 14%|█▍        | 3607/25000 [01:23<10:03, 35.46it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 15%|█▍        | 3706/25000 [01:26<10:27, 33.94it/s]

Ep 3700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 9


 15%|█▌        | 3805/25000 [01:28<07:32, 46.81it/s]

Ep 3800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 9


 16%|█▌        | 3905/25000 [01:30<07:40, 45.86it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


 16%|█▌        | 4005/25000 [01:32<07:33, 46.33it/s]

Ep 4000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 9


 16%|█▋        | 4105/25000 [01:35<07:25, 46.85it/s]

Ep 4100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 9


 17%|█▋        | 4205/25000 [01:37<09:08, 37.90it/s]

Ep 4200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.8, First Action 9


 17%|█▋        | 4305/25000 [01:40<09:57, 34.63it/s]

Ep 4300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 9


 18%|█▊        | 4405/25000 [01:42<07:19, 46.89it/s]

Ep 4400/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 9


 18%|█▊        | 4505/25000 [01:44<07:35, 44.95it/s]

Ep 4500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 9


 18%|█▊        | 4605/25000 [01:46<07:25, 45.82it/s]

Ep 4600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 9


 19%|█▉        | 4705/25000 [01:48<07:22, 45.83it/s]

Ep 4700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 9


 19%|█▉        | 4803/25000 [01:51<09:03, 37.17it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 20%|█▉        | 4905/25000 [01:53<09:35, 34.95it/s]

Ep 4900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 9


 20%|██        | 5006/25000 [01:56<07:22, 45.22it/s]

Ep 5000/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.6, First Action 9


 20%|██        | 5106/25000 [01:58<07:13, 45.93it/s]

Ep 5100/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.4, First Action 9


 21%|██        | 5206/25000 [02:00<07:01, 46.95it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.4, First Action 9


 21%|██        | 5306/25000 [02:02<07:13, 45.47it/s]

Ep 5300/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.0, First Action 9


 22%|██▏       | 5406/25000 [02:05<08:02, 40.58it/s]

Ep 5400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.3, First Action 9


 22%|██▏       | 5503/25000 [02:07<09:36, 33.84it/s]

Ep 5500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 9


 22%|██▏       | 5609/25000 [02:10<06:56, 46.59it/s]

Ep 5600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 9


 23%|██▎       | 5709/25000 [02:12<07:06, 45.19it/s]

Ep 5700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 9


 23%|██▎       | 5809/25000 [02:14<06:58, 45.88it/s]

Ep 5800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.9, First Action 9


 24%|██▎       | 5909/25000 [02:17<06:48, 46.76it/s]

Ep 5900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 9


 24%|██▍       | 6004/25000 [02:19<07:34, 41.76it/s]

Ep 6000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 9


 24%|██▍       | 6106/25000 [02:21<09:08, 34.43it/s]

Ep 6100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 25%|██▍       | 6205/25000 [02:24<06:45, 46.31it/s]

Ep 6200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 9


 25%|██▌       | 6305/25000 [02:26<06:49, 45.60it/s]

Ep 6300/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.9, First Action 9


 26%|██▌       | 6405/25000 [02:28<06:48, 45.51it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 9


 26%|██▌       | 6505/25000 [02:30<06:40, 46.12it/s]

Ep 6500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.5, First Action 9


 26%|██▋       | 6605/25000 [02:33<06:29, 47.19it/s]

Ep 6600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 9


 27%|██▋       | 6706/25000 [02:35<08:08, 37.48it/s]

Ep 6700/25000, Opt. Action: 9, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 9


 27%|██▋       | 6809/25000 [02:38<06:32, 46.30it/s]

Ep 6800/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.6, First Action 9


 28%|██▊       | 6909/25000 [02:40<06:31, 46.24it/s]

Ep 6900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 9


 28%|██▊       | 7004/25000 [02:42<06:41, 44.82it/s]

Ep 7000/25000, Opt. Action: 7, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 9


 28%|██▊       | 7109/25000 [02:45<06:30, 45.78it/s]

Ep 7100/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 9


 29%|██▉       | 7209/25000 [02:47<06:19, 46.88it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 9


 29%|██▉       | 7305/25000 [02:49<08:22, 35.24it/s]

Ep 7300/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 9


 30%|██▉       | 7406/25000 [02:52<06:21, 46.07it/s]

Ep 7400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 9


 30%|███       | 7506/25000 [02:54<06:27, 45.18it/s]

Ep 7500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 9


 30%|███       | 7606/25000 [02:56<06:18, 45.97it/s]

Ep 7600/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.8, First Action 9


 31%|███       | 7706/25000 [02:59<06:13, 46.35it/s]

Ep 7700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 9


 31%|███       | 7806/25000 [03:01<06:07, 46.80it/s]

Ep 7800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 9


 32%|███▏      | 7904/25000 [03:03<07:48, 36.49it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.6, First Action 9


 32%|███▏      | 8006/25000 [03:06<06:17, 45.01it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 9


 32%|███▏      | 8106/25000 [03:08<06:08, 45.80it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 9


 33%|███▎      | 8206/25000 [03:10<06:10, 45.29it/s]

Ep 8200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 9


 33%|███▎      | 8306/25000 [03:13<06:01, 46.12it/s]

Ep 8300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 34%|███▎      | 8406/25000 [03:15<05:56, 46.52it/s]

Ep 8400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 9


 34%|███▍      | 8504/25000 [03:17<07:45, 35.47it/s]

Ep 8500/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.6, First Action 9


 34%|███▍      | 8609/25000 [03:20<06:10, 44.25it/s]

Ep 8600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 9


 35%|███▍      | 8709/25000 [03:22<05:52, 46.21it/s]

Ep 8700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.7, First Action 9


 35%|███▌      | 8809/25000 [03:24<05:51, 46.01it/s]

Ep 8800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.2, First Action 9


 36%|███▌      | 8909/25000 [03:27<05:43, 46.82it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 9


 36%|███▌      | 9009/25000 [03:29<05:40, 46.96it/s]

Ep 9000/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.8, First Action 9


 36%|███▋      | 9103/25000 [03:31<07:39, 34.57it/s]

Ep 9100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 37%|███▋      | 9208/25000 [03:34<06:19, 41.60it/s]

Ep 9200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 9


 37%|███▋      | 9308/25000 [03:36<05:40, 46.09it/s]

Ep 9300/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.9, First Action 9


 38%|███▊      | 9408/25000 [03:38<05:36, 46.28it/s]

Ep 9400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 38%|███▊      | 9508/25000 [03:41<05:38, 45.71it/s]

Ep 9500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 9


 38%|███▊      | 9608/25000 [03:43<05:32, 46.28it/s]

Ep 9600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 9


 39%|███▉      | 9704/25000 [03:45<06:42, 37.96it/s]

Ep 9700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.1, First Action 9


 39%|███▉      | 9804/25000 [03:48<07:06, 35.66it/s]

Ep 9800/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 9


 40%|███▉      | 9909/25000 [03:50<05:27, 46.08it/s]

Ep 9900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 40%|████      | 10009/25000 [03:52<05:23, 46.39it/s]

Ep 10000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 9


 40%|████      | 10109/25000 [03:55<05:23, 46.08it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.0, First Action 9


 41%|████      | 10209/25000 [03:57<05:14, 46.97it/s]

Ep 10200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.4, First Action 9


 41%|████      | 10306/25000 [03:59<06:57, 35.21it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 9


 42%|████▏     | 10407/25000 [04:02<06:59, 34.78it/s]

Ep 10400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 9


 42%|████▏     | 10509/25000 [04:04<05:12, 46.32it/s]

Ep 10500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 42%|████▏     | 10609/25000 [04:06<05:16, 45.52it/s]

Ep 10600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 9


 43%|████▎     | 10709/25000 [04:09<05:12, 45.70it/s]

Ep 10700/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.8, First Action 9


 43%|████▎     | 10809/25000 [04:11<05:07, 46.19it/s]

Ep 10800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 9


 44%|████▎     | 10905/25000 [04:13<06:38, 35.36it/s]

Ep 10900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.7, First Action 9


 44%|████▍     | 11007/25000 [04:16<06:37, 35.19it/s]

Ep 11000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 9


 44%|████▍     | 11108/25000 [04:18<05:09, 44.87it/s]

Ep 11100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 45%|████▍     | 11208/25000 [04:20<04:56, 46.48it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 16.9, First Action 9


 45%|████▌     | 11308/25000 [04:23<04:55, 46.39it/s]

Ep 11300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.8, First Action 9


 46%|████▌     | 11408/25000 [04:25<04:55, 45.96it/s]

Ep 11400/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.3, First Action 9


 46%|████▌     | 11506/25000 [04:27<06:11, 36.31it/s]

Ep 11500/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.1, First Action 9


 46%|████▋     | 11603/25000 [04:30<06:22, 35.05it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 9


 47%|████▋     | 11707/25000 [04:32<04:46, 46.33it/s]

Ep 11700/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.4, First Action 9


 47%|████▋     | 11807/25000 [04:34<04:42, 46.72it/s]

Ep 11800/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 9


 48%|████▊     | 11907/25000 [04:37<04:45, 45.81it/s]

Ep 11900/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.3, First Action 9


 48%|████▊     | 12007/25000 [04:39<04:46, 45.40it/s]

Ep 12000/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 9


 48%|████▊     | 12105/25000 [04:41<05:34, 38.55it/s]

Ep 12100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 9


 49%|████▉     | 12206/25000 [04:44<06:00, 35.47it/s]

Ep 12200/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.3, First Action 9


 49%|████▉     | 12307/25000 [04:46<04:39, 45.49it/s]

Ep 12300/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.6, First Action 9


 50%|████▉     | 12407/25000 [04:48<04:39, 45.10it/s]

Ep 12400/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 9


 50%|█████     | 12507/25000 [04:51<04:33, 45.72it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 9


 50%|█████     | 12607/25000 [04:53<04:28, 46.17it/s]

Ep 12600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 51%|█████     | 12702/25000 [04:55<05:06, 40.07it/s]

Ep 12700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 9


 51%|█████     | 12807/25000 [04:58<06:03, 33.54it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 9


 52%|█████▏    | 12905/25000 [05:00<04:18, 46.86it/s]

Ep 12900/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.2, First Action 9


 52%|█████▏    | 13005/25000 [05:02<04:18, 46.42it/s]

Ep 13000/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 9


 52%|█████▏    | 13105/25000 [05:05<04:24, 44.92it/s]

Ep 13100/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.7, First Action 9


 53%|█████▎    | 13205/25000 [05:07<04:14, 46.36it/s]

Ep 13200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 9


 53%|█████▎    | 13305/25000 [05:09<04:48, 40.53it/s]

Ep 13300/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.3, First Action 9


 54%|█████▎    | 13406/25000 [05:12<05:35, 34.54it/s]

Ep 13400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 54%|█████▍    | 13507/25000 [05:14<04:10, 45.90it/s]

Ep 13500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 9


 54%|█████▍    | 13607/25000 [05:17<04:06, 46.22it/s]

Ep 13600/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.7, First Action 9


 55%|█████▍    | 13707/25000 [05:19<04:02, 46.56it/s]

Ep 13700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.9, First Action 9


 55%|█████▌    | 13807/25000 [05:21<04:00, 46.48it/s]

Ep 13800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 56%|█████▌    | 13907/25000 [05:23<04:26, 41.62it/s]

Ep 13900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 56%|█████▌    | 14006/25000 [05:26<05:11, 35.33it/s]

Ep 14000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 9


 56%|█████▋    | 14106/25000 [05:28<03:58, 45.67it/s]

Ep 14100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 9


 57%|█████▋    | 14206/25000 [05:31<03:53, 46.13it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 9


 57%|█████▋    | 14306/25000 [05:33<03:56, 45.18it/s]

Ep 14300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.7, First Action 9


 58%|█████▊    | 14406/25000 [05:35<03:49, 46.14it/s]

Ep 14400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 58%|█████▊    | 14506/25000 [05:37<03:57, 44.26it/s]

Ep 14500/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.4, First Action 9


 58%|█████▊    | 14607/25000 [05:40<04:36, 37.57it/s]

Ep 14600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 59%|█████▉    | 14709/25000 [05:42<03:43, 45.96it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.3, First Action 9


 59%|█████▉    | 14809/25000 [05:45<03:39, 46.49it/s]

Ep 14800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.3, First Action 9


 60%|█████▉    | 14909/25000 [05:47<03:36, 46.69it/s]

Ep 14900/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 12.8, First Action 9


 60%|██████    | 15009/25000 [05:49<03:32, 47.07it/s]

Ep 15000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 9


 60%|██████    | 15109/25000 [05:51<03:35, 45.90it/s]

Ep 15100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.6, First Action 9


 61%|██████    | 15204/25000 [05:54<04:23, 37.25it/s]

Ep 15200/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.2, First Action 9


 61%|██████    | 15306/25000 [05:56<03:32, 45.62it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 9


 62%|██████▏   | 15406/25000 [05:59<03:25, 46.69it/s]

Ep 15400/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 9


 62%|██████▏   | 15506/25000 [06:01<03:26, 46.01it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 9


 62%|██████▏   | 15606/25000 [06:03<03:23, 46.27it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 9


 63%|██████▎   | 15706/25000 [06:05<03:25, 45.28it/s]

Ep 15700/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 9


 63%|██████▎   | 15805/25000 [06:08<04:11, 36.61it/s]

Ep 15800/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.0, First Action 9


 64%|██████▎   | 15909/25000 [06:10<03:22, 44.93it/s]

Ep 15900/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.8, First Action 9


 64%|██████▍   | 16009/25000 [06:13<03:14, 46.23it/s]

Ep 16000/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.5, First Action 9


 64%|██████▍   | 16109/25000 [06:15<03:10, 46.57it/s]

Ep 16100/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.4, First Action 9


 65%|██████▍   | 16209/25000 [06:17<03:07, 46.87it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.1, First Action 9


 65%|██████▌   | 16309/25000 [06:19<03:08, 46.15it/s]

Ep 16300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 66%|██████▌   | 16406/25000 [06:22<03:59, 35.89it/s]

Ep 16400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 9


 66%|██████▌   | 16507/25000 [06:24<03:12, 44.07it/s]

Ep 16500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 9


 66%|██████▋   | 16607/25000 [06:27<03:04, 45.44it/s]

Ep 16600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 9


 67%|██████▋   | 16707/25000 [06:29<02:57, 46.83it/s]

Ep 16700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 9


 67%|██████▋   | 16807/25000 [06:31<03:04, 44.40it/s]

Ep 16800/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.2, First Action 9


 68%|██████▊   | 16907/25000 [06:33<02:58, 45.33it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 9


 68%|██████▊   | 17005/25000 [06:36<03:28, 38.27it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 9


 68%|██████▊   | 17109/25000 [06:39<03:07, 42.04it/s]

Ep 17100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.3, First Action 9


 69%|██████▉   | 17209/25000 [06:41<02:49, 46.00it/s]

Ep 17200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 9


 69%|██████▉   | 17309/25000 [06:43<02:45, 46.46it/s]

Ep 17300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 9


 70%|██████▉   | 17409/25000 [06:45<02:41, 47.01it/s]

Ep 17400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 9


 70%|███████   | 17509/25000 [06:47<02:43, 45.70it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.0, First Action 9


 70%|███████   | 17607/25000 [06:50<03:26, 35.88it/s]

Ep 17600/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 9


 71%|███████   | 17709/25000 [06:53<03:02, 39.92it/s]

Ep 17700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 9


 71%|███████   | 17809/25000 [06:55<02:32, 47.08it/s]

Ep 17800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 9


 72%|███████▏  | 17909/25000 [06:57<02:31, 46.92it/s]

Ep 17900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 72%|███████▏  | 18009/25000 [06:59<02:35, 44.99it/s]

Ep 18000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 72%|███████▏  | 18109/25000 [07:01<02:30, 45.81it/s]

Ep 18100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 9


 73%|███████▎  | 18205/25000 [07:04<02:53, 39.27it/s]

Ep 18200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 9


 73%|███████▎  | 18305/25000 [07:07<03:16, 34.04it/s]

Ep 18300/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.7, First Action 9


 74%|███████▎  | 18409/25000 [07:09<02:22, 46.12it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 12.9, First Action 9


 74%|███████▍  | 18509/25000 [07:11<02:21, 45.76it/s]

Ep 18500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 9


 74%|███████▍  | 18609/25000 [07:13<02:20, 45.37it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 9


 75%|███████▍  | 18709/25000 [07:15<02:14, 46.67it/s]

Ep 18700/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.9, First Action 9


 75%|███████▌  | 18807/25000 [07:18<02:45, 37.47it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.0, First Action 9


 76%|███████▌  | 18906/25000 [07:20<02:58, 34.12it/s]

Ep 18900/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.7, First Action 9


 76%|███████▌  | 19006/25000 [07:23<02:08, 46.77it/s]

Ep 19000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 9


 76%|███████▋  | 19106/25000 [07:25<02:04, 47.33it/s]

Ep 19100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.4, First Action 9


 77%|███████▋  | 19206/25000 [07:27<02:10, 44.52it/s]

Ep 19200/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.7, First Action 9


 77%|███████▋  | 19306/25000 [07:29<02:06, 44.90it/s]

Ep 19300/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 9


 78%|███████▊  | 19404/25000 [07:32<02:32, 36.58it/s]

Ep 19400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 9


 78%|███████▊  | 19505/25000 [07:34<02:44, 33.48it/s]

Ep 19500/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.8, First Action 9


 78%|███████▊  | 19608/25000 [07:37<01:57, 46.07it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 9


 79%|███████▉  | 19708/25000 [07:39<01:53, 46.65it/s]

Ep 19700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 79%|███████▉  | 19808/25000 [07:41<01:53, 45.67it/s]

Ep 19800/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 9


 80%|███████▉  | 19908/25000 [07:43<01:50, 45.92it/s]

Ep 19900/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.0, First Action 9


 80%|████████  | 20003/25000 [07:45<02:04, 40.02it/s]

Ep 20000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 9


 80%|████████  | 20105/25000 [07:48<02:22, 34.30it/s]

Ep 20100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 9


 81%|████████  | 20205/25000 [07:51<01:42, 46.56it/s]

Ep 20200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 9


 81%|████████  | 20305/25000 [07:53<01:42, 45.74it/s]

Ep 20300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.5, First Action 9


 82%|████████▏ | 20405/25000 [07:55<01:41, 45.35it/s]

Ep 20400/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 9


 82%|████████▏ | 20505/25000 [07:57<01:37, 46.32it/s]

Ep 20500/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.9, First Action 9


 82%|████████▏ | 20605/25000 [08:00<01:41, 43.13it/s]

Ep 20600/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.0, First Action 9


 83%|████████▎ | 20703/25000 [08:02<01:55, 37.17it/s]

Ep 20700/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.0, First Action 9


 83%|████████▎ | 20809/25000 [08:05<01:30, 46.18it/s]

Ep 20800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 9


 84%|████████▎ | 20909/25000 [08:07<01:29, 45.71it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.1, First Action 9


 84%|████████▍ | 21009/25000 [08:09<01:27, 45.58it/s]

Ep 21000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 12.9, First Action 9


 84%|████████▍ | 21109/25000 [08:12<01:24, 45.86it/s]

Ep 21100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 9


 85%|████████▍ | 21209/25000 [08:14<01:21, 46.64it/s]

Ep 21200/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 9


 85%|████████▌ | 21306/25000 [08:16<01:42, 36.01it/s]

Ep 21300/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 9


 86%|████████▌ | 21407/25000 [08:19<01:18, 45.66it/s]

Ep 21400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.6, First Action 9


 86%|████████▌ | 21507/25000 [08:21<01:19, 43.94it/s]

Ep 21500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 9


 86%|████████▋ | 21607/25000 [08:23<01:15, 44.92it/s]

Ep 21600/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.9, First Action 9


 87%|████████▋ | 21707/25000 [08:26<01:11, 45.95it/s]

Ep 21700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 9


 87%|████████▋ | 21807/25000 [08:28<01:08, 46.60it/s]

Ep 21800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 9


 88%|████████▊ | 21907/25000 [08:31<01:25, 36.21it/s]

Ep 21900/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.2, First Action 9


 88%|████████▊ | 22009/25000 [08:33<01:06, 44.73it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.5, First Action 9


 88%|████████▊ | 22109/25000 [08:35<01:02, 46.44it/s]

Ep 22100/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.2, First Action 9


 89%|████████▉ | 22209/25000 [08:38<00:59, 47.02it/s]

Ep 22200/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 9


 89%|████████▉ | 22309/25000 [08:40<00:58, 45.88it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 9


 90%|████████▉ | 22409/25000 [08:42<00:56, 45.86it/s]

Ep 22400/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.1, First Action 9


 90%|█████████ | 22506/25000 [08:45<01:05, 38.28it/s]

Ep 22500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9


 90%|█████████ | 22604/25000 [08:47<00:54, 44.04it/s]

Ep 22600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 9


 91%|█████████ | 22709/25000 [08:49<00:48, 47.03it/s]

Ep 22700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.1, First Action 9


 91%|█████████ | 22809/25000 [08:52<00:47, 46.36it/s]

Ep 22800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.0, First Action 9


 92%|█████████▏| 22909/25000 [08:54<00:45, 46.44it/s]

Ep 22900/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.3, First Action 9


 92%|█████████▏| 23009/25000 [08:56<00:42, 46.42it/s]

Ep 23000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.4, First Action 9


 92%|█████████▏| 23107/25000 [08:59<00:49, 37.99it/s]

Ep 23100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 9


 93%|█████████▎| 23209/25000 [09:01<00:39, 45.04it/s]

Ep 23200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 9


 93%|█████████▎| 23309/25000 [09:04<00:37, 45.38it/s]

Ep 23300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 9


 94%|█████████▎| 23409/25000 [09:06<00:33, 47.27it/s]

Ep 23400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.9, First Action 9


 94%|█████████▍| 23509/25000 [09:08<00:32, 46.58it/s]

Ep 23500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 9


 94%|█████████▍| 23609/25000 [09:10<00:30, 44.99it/s]

Ep 23600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.1, First Action 9


 95%|█████████▍| 23703/25000 [09:12<00:35, 36.38it/s]

Ep 23700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 9


 95%|█████████▌| 23806/25000 [09:15<00:28, 42.51it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 9


 96%|█████████▌| 23906/25000 [09:17<00:24, 45.46it/s]

Ep 23900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 9


 96%|█████████▌| 24006/25000 [09:20<00:21, 46.34it/s]

Ep 24000/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 9


 96%|█████████▋| 24106/25000 [09:22<00:19, 46.55it/s]

Ep 24100/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.1, First Action 9


 97%|█████████▋| 24206/25000 [09:24<00:17, 46.32it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 9


 97%|█████████▋| 24304/25000 [09:26<00:18, 37.88it/s]

Ep 24300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.5, First Action 9


 98%|█████████▊| 24404/25000 [09:29<00:16, 35.72it/s]

Ep 24400/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 9


 98%|█████████▊| 24509/25000 [09:32<00:10, 45.86it/s]

Ep 24500/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 9


 98%|█████████▊| 24609/25000 [09:34<00:08, 47.26it/s]

Ep 24600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 9


 99%|█████████▉| 24709/25000 [09:36<00:06, 45.86it/s]

Ep 24700/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.0, First Action 9


 99%|█████████▉| 24804/25000 [09:38<00:04, 45.50it/s]

Ep 24800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 9


100%|█████████▉| 24906/25000 [09:41<00:02, 37.99it/s]

Ep 24900/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.0, First Action 9


100%|██████████| 25000/25000 [09:43<00:00, 42.83it/s]


Ep 25000/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.9, First Action 9

TEST:


 41%|████      | 122/300 [00:00<00:01, 144.87it/s]

Ep 100/300, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 9


 76%|███████▌  | 228/300 [00:01<00:00, 147.16it/s]

Ep 200/300, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.1, First Action 9


100%|██████████| 300/300 [00:02<00:00, 144.42it/s]


Ep 300/300, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.6, First Action 9

GAMMA 1 - LR 0.001 - Entropy Decay False


  0%|          | 109/25000 [00:02<08:48, 47.07it/s]

Ep 100/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.1, First Action 10


  1%|          | 209/25000 [00:04<09:05, 45.42it/s]

Ep 200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  1%|          | 309/25000 [00:06<09:00, 45.68it/s]

Ep 300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


  2%|▏         | 403/25000 [00:09<11:17, 36.29it/s]

Ep 400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


  2%|▏         | 506/25000 [00:11<12:02, 33.89it/s]

Ep 500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  2%|▏         | 609/25000 [00:14<08:46, 46.36it/s]

Ep 600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  3%|▎         | 709/25000 [00:16<08:36, 47.04it/s]

Ep 700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  3%|▎         | 809/25000 [00:18<08:57, 45.00it/s]

Ep 800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


  4%|▎         | 909/25000 [00:20<08:45, 45.84it/s]

Ep 900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


  4%|▍         | 1003/25000 [00:23<10:52, 36.75it/s]

Ep 1000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


  4%|▍         | 1106/25000 [00:26<11:53, 33.49it/s]

Ep 1100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


  5%|▍         | 1207/25000 [00:28<08:28, 46.77it/s]

Ep 1200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  5%|▌         | 1307/25000 [00:30<08:42, 45.37it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 1


  6%|▌         | 1407/25000 [00:32<08:35, 45.75it/s]

Ep 1400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


  6%|▌         | 1507/25000 [00:34<08:20, 46.98it/s]

Ep 1500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  6%|▋         | 1605/25000 [00:37<10:09, 38.41it/s]

Ep 1600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  7%|▋         | 1707/25000 [00:39<11:11, 34.68it/s]

Ep 1700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  7%|▋         | 1807/25000 [00:42<08:07, 47.53it/s]

Ep 1800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 1


  8%|▊         | 1907/25000 [00:44<08:34, 44.92it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  8%|▊         | 2007/25000 [00:46<08:28, 45.21it/s]

Ep 2000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


  8%|▊         | 2107/25000 [00:48<08:22, 45.56it/s]

Ep 2100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  9%|▉         | 2206/25000 [00:51<09:51, 38.56it/s]

Ep 2200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  9%|▉         | 2304/25000 [00:53<11:02, 34.25it/s]

Ep 2300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 10%|▉         | 2409/25000 [00:56<08:08, 46.29it/s]

Ep 2400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 10%|█         | 2509/25000 [00:58<08:13, 45.58it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 10%|█         | 2609/25000 [01:00<08:04, 46.21it/s]

Ep 2600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 11%|█         | 2709/25000 [01:02<08:02, 46.19it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 11%|█         | 2804/25000 [01:05<08:35, 43.03it/s]

Ep 2800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 12%|█▏        | 2906/25000 [01:07<10:42, 34.37it/s]

Ep 2900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 12%|█▏        | 3006/25000 [01:10<07:52, 46.59it/s]

Ep 3000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 12%|█▏        | 3106/25000 [01:12<08:06, 44.99it/s]

Ep 3100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 13%|█▎        | 3206/25000 [01:14<07:53, 46.04it/s]

Ep 3200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 1


 13%|█▎        | 3306/25000 [01:17<07:55, 45.61it/s]

Ep 3300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 14%|█▎        | 3406/25000 [01:19<08:27, 42.53it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 14%|█▍        | 3505/25000 [01:22<10:41, 33.49it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 14%|█▍        | 3606/25000 [01:24<07:58, 44.68it/s]

Ep 3600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 15%|█▍        | 3706/25000 [01:26<07:43, 45.97it/s]

Ep 3700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 15%|█▌        | 3806/25000 [01:28<07:33, 46.73it/s]

Ep 3800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 1


 16%|█▌        | 3906/25000 [01:31<07:34, 46.42it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 16%|█▌        | 4006/25000 [01:33<08:23, 41.71it/s]

Ep 4000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 16%|█▋        | 4105/25000 [01:36<10:12, 34.12it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 17%|█▋        | 4209/25000 [01:38<07:39, 45.20it/s]

Ep 4200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 17%|█▋        | 4309/25000 [01:40<07:29, 46.08it/s]

Ep 4300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 18%|█▊        | 4409/25000 [01:43<07:25, 46.17it/s]

Ep 4400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 18%|█▊        | 4509/25000 [01:45<07:22, 46.35it/s]

Ep 4500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 18%|█▊        | 4604/25000 [01:47<07:18, 46.52it/s]

Ep 4600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 19%|█▉        | 4703/25000 [01:50<09:06, 37.17it/s]

Ep 4700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 19%|█▉        | 4806/25000 [01:52<07:25, 45.30it/s]

Ep 4800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 20%|█▉        | 4906/25000 [01:54<07:13, 46.32it/s]

Ep 4900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 20%|██        | 5006/25000 [01:57<07:08, 46.63it/s]

Ep 5000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 20%|██        | 5106/25000 [01:59<07:05, 46.78it/s]

Ep 5100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 21%|██        | 5206/25000 [02:01<07:00, 47.08it/s]

Ep 5200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 21%|██        | 5304/25000 [02:04<09:08, 35.93it/s]

Ep 5300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 22%|██▏       | 5405/25000 [02:06<07:09, 45.62it/s]

Ep 5400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 22%|██▏       | 5505/25000 [02:08<07:12, 45.03it/s]

Ep 5500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 22%|██▏       | 5605/25000 [02:11<07:09, 45.19it/s]

Ep 5600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 23%|██▎       | 5705/25000 [02:13<06:55, 46.42it/s]

Ep 5700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 1


 23%|██▎       | 5805/25000 [02:15<07:02, 45.45it/s]

Ep 5800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 24%|██▎       | 5904/25000 [02:18<08:32, 37.28it/s]

Ep 5900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 24%|██▍       | 6009/25000 [02:20<06:55, 45.76it/s]

Ep 6000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 24%|██▍       | 6109/25000 [02:23<06:52, 45.82it/s]

Ep 6100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 25%|██▍       | 6209/25000 [02:25<06:44, 46.51it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 25%|██▌       | 6309/25000 [02:27<06:55, 44.94it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 26%|██▌       | 6404/25000 [02:29<06:55, 44.78it/s]

Ep 6400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 26%|██▌       | 6506/25000 [02:32<08:08, 37.83it/s]

Ep 6500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 26%|██▋       | 6606/25000 [02:34<06:53, 44.44it/s]

Ep 6600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 27%|██▋       | 6706/25000 [02:37<06:35, 46.23it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 27%|██▋       | 6806/25000 [02:39<06:22, 47.53it/s]

Ep 6800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 28%|██▊       | 6906/25000 [02:41<06:27, 46.71it/s]

Ep 6900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 28%|██▊       | 7006/25000 [02:43<06:34, 45.63it/s]

Ep 7000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 28%|██▊       | 7105/25000 [02:46<08:31, 35.01it/s]

Ep 7100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 29%|██▉       | 7207/25000 [02:49<06:32, 45.28it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 29%|██▉       | 7307/25000 [02:51<06:17, 46.87it/s]

Ep 7300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 30%|██▉       | 7407/25000 [02:53<06:21, 46.10it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 30%|███       | 7507/25000 [02:55<06:29, 44.95it/s]

Ep 7500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 30%|███       | 7607/25000 [02:57<06:23, 45.41it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 31%|███       | 7704/25000 [03:00<07:50, 36.73it/s]

Ep 7700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 1


 31%|███       | 7806/25000 [03:03<06:33, 43.64it/s]

Ep 7800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 32%|███▏      | 7906/25000 [03:05<06:10, 46.13it/s]

Ep 7900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 32%|███▏      | 8006/25000 [03:07<06:19, 44.76it/s]

Ep 8000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 32%|███▏      | 8106/25000 [03:09<06:07, 46.01it/s]

Ep 8100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 33%|███▎      | 8206/25000 [03:11<06:05, 45.95it/s]

Ep 8200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 33%|███▎      | 8307/25000 [03:14<07:33, 36.82it/s]

Ep 8300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 34%|███▎      | 8408/25000 [03:17<06:44, 41.06it/s]

Ep 8400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 34%|███▍      | 8508/25000 [03:19<05:59, 45.90it/s]

Ep 8500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 34%|███▍      | 8608/25000 [03:21<06:01, 45.31it/s]

Ep 8600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 35%|███▍      | 8708/25000 [03:23<05:56, 45.75it/s]

Ep 8700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 35%|███▌      | 8808/25000 [03:26<05:50, 46.21it/s]

Ep 8800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 36%|███▌      | 8904/25000 [03:28<07:33, 35.48it/s]

Ep 8900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 36%|███▌      | 9007/25000 [03:31<06:06, 43.65it/s]

Ep 9000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 36%|███▋      | 9107/25000 [03:33<05:56, 44.55it/s]

Ep 9100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 37%|███▋      | 9207/25000 [03:35<05:44, 45.81it/s]

Ep 9200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 37%|███▋      | 9307/25000 [03:37<05:39, 46.27it/s]

Ep 9300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 38%|███▊      | 9407/25000 [03:40<05:32, 46.90it/s]

Ep 9400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 38%|███▊      | 9505/25000 [03:42<07:22, 35.03it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 38%|███▊      | 9608/25000 [03:45<06:19, 40.57it/s]

Ep 9600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 39%|███▉      | 9708/25000 [03:47<05:46, 44.08it/s]

Ep 9700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 39%|███▉      | 9808/25000 [03:49<05:31, 45.88it/s]

Ep 9800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 40%|███▉      | 9908/25000 [03:52<05:31, 45.56it/s]

Ep 9900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 40%|████      | 10008/25000 [03:54<05:25, 46.08it/s]

Ep 10000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 40%|████      | 10103/25000 [03:56<07:08, 34.78it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 41%|████      | 10207/25000 [03:59<06:07, 40.20it/s]

Ep 10200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 41%|████      | 10307/25000 [04:01<05:18, 46.08it/s]

Ep 10300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 42%|████▏     | 10407/25000 [04:03<05:15, 46.30it/s]

Ep 10400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 42%|████▏     | 10507/25000 [04:06<05:07, 47.13it/s]

Ep 10500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 42%|████▏     | 10607/25000 [04:08<05:06, 46.96it/s]

Ep 10600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 43%|████▎     | 10706/25000 [04:10<06:22, 37.36it/s]

Ep 10700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 43%|████▎     | 10803/25000 [04:13<07:09, 33.06it/s]

Ep 10800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 1


 44%|████▎     | 10907/25000 [04:15<05:11, 45.25it/s]

Ep 10900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 44%|████▍     | 11007/25000 [04:18<05:04, 46.01it/s]

Ep 11000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 44%|████▍     | 11107/25000 [04:20<05:01, 46.13it/s]

Ep 11100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 45%|████▍     | 11207/25000 [04:22<04:53, 47.00it/s]

Ep 11200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 45%|████▌     | 11305/25000 [04:24<05:59, 38.05it/s]

Ep 11300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 46%|████▌     | 11405/25000 [04:27<06:49, 33.18it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 46%|████▌     | 11509/25000 [04:30<04:51, 46.25it/s]

Ep 11500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 46%|████▋     | 11609/25000 [04:32<04:45, 46.85it/s]

Ep 11600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 47%|████▋     | 11709/25000 [04:34<04:44, 46.71it/s]

Ep 11700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 47%|████▋     | 11804/25000 [04:36<04:41, 46.90it/s]

Ep 11800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 48%|████▊     | 11906/25000 [04:38<06:19, 34.50it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 48%|████▊     | 12003/25000 [04:41<06:40, 32.42it/s]

Ep 12000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 48%|████▊     | 12106/25000 [04:44<04:43, 45.46it/s]

Ep 12100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 49%|████▉     | 12206/25000 [04:46<04:41, 45.48it/s]

Ep 12200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 49%|████▉     | 12306/25000 [04:48<04:34, 46.30it/s]

Ep 12300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 50%|████▉     | 12406/25000 [04:50<04:35, 45.72it/s]

Ep 12400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 50%|█████     | 12505/25000 [04:53<05:47, 35.95it/s]

Ep 12500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 50%|█████     | 12606/25000 [04:55<06:06, 33.81it/s]

Ep 12600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 1


 51%|█████     | 12705/25000 [04:58<04:22, 46.75it/s]

Ep 12700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 51%|█████     | 12805/25000 [05:00<04:20, 46.81it/s]

Ep 12800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 52%|█████▏    | 12905/25000 [05:02<04:23, 45.84it/s]

Ep 12900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 1


 52%|█████▏    | 13005/25000 [05:04<04:27, 44.92it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 52%|█████▏    | 13106/25000 [05:07<05:19, 37.19it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 1


 53%|█████▎    | 13203/25000 [05:09<05:48, 33.86it/s]

Ep 13200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 53%|█████▎    | 13306/25000 [05:12<04:10, 46.65it/s]

Ep 13300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 54%|█████▎    | 13406/25000 [05:14<04:12, 45.90it/s]

Ep 13400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 54%|█████▍    | 13506/25000 [05:16<04:10, 45.81it/s]

Ep 13500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 54%|█████▍    | 13606/25000 [05:18<04:09, 45.67it/s]

Ep 13600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 55%|█████▍    | 13704/25000 [05:21<04:49, 39.05it/s]

Ep 13700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 55%|█████▌    | 13806/25000 [05:24<05:22, 34.70it/s]

Ep 13800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 56%|█████▌    | 13907/25000 [05:26<04:00, 46.15it/s]

Ep 13900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 56%|█████▌    | 14007/25000 [05:28<04:06, 44.61it/s]

Ep 14000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 1


 56%|█████▋    | 14107/25000 [05:30<03:58, 45.76it/s]

Ep 14100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 1


 57%|█████▋    | 14207/25000 [05:33<03:57, 45.38it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 57%|█████▋    | 14307/25000 [05:35<04:35, 38.84it/s]

Ep 14300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 58%|█████▊    | 14406/25000 [05:38<05:22, 32.86it/s]

Ep 14400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 1


 58%|█████▊    | 14507/25000 [05:40<03:45, 46.46it/s]

Ep 14500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 58%|█████▊    | 14607/25000 [05:42<03:45, 46.01it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 1


 59%|█████▉    | 14707/25000 [05:45<03:42, 46.28it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 59%|█████▉    | 14807/25000 [05:47<03:42, 45.91it/s]

Ep 14800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 60%|█████▉    | 14902/25000 [05:49<03:45, 44.82it/s]

Ep 14900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 60%|██████    | 15005/25000 [05:52<04:37, 36.07it/s]

Ep 15000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 60%|██████    | 15106/25000 [05:54<03:39, 45.02it/s]

Ep 15100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 61%|██████    | 15206/25000 [05:56<03:36, 45.20it/s]

Ep 15200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 1


 61%|██████    | 15306/25000 [05:59<03:28, 46.53it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 62%|██████▏   | 15406/25000 [06:01<03:25, 46.73it/s]

Ep 15400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 62%|██████▏   | 15506/25000 [06:03<03:29, 45.28it/s]

Ep 15500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 62%|██████▏   | 15604/25000 [06:06<04:08, 37.80it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 63%|██████▎   | 15708/25000 [06:08<03:23, 45.58it/s]

Ep 15700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 63%|██████▎   | 15808/25000 [06:11<03:19, 46.13it/s]

Ep 15800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 64%|██████▎   | 15908/25000 [06:13<03:19, 45.60it/s]

Ep 15900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 64%|██████▍   | 16008/25000 [06:15<03:22, 44.46it/s]

Ep 16000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 1


 64%|██████▍   | 16108/25000 [06:17<03:19, 44.60it/s]

Ep 16100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 1


 65%|██████▍   | 16205/25000 [06:20<03:58, 36.92it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 1


 65%|██████▌   | 16307/25000 [06:23<03:19, 43.49it/s]

Ep 16300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 66%|██████▌   | 16407/25000 [06:25<03:06, 46.18it/s]

Ep 16400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 66%|██████▌   | 16507/25000 [06:27<03:06, 45.64it/s]

Ep 16500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 66%|██████▋   | 16607/25000 [06:29<03:04, 45.38it/s]

Ep 16600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 67%|██████▋   | 16707/25000 [06:31<02:58, 46.37it/s]

Ep 16700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 67%|██████▋   | 16804/25000 [06:34<03:36, 37.91it/s]

Ep 16800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 68%|██████▊   | 16909/25000 [06:37<03:10, 42.51it/s]

Ep 16900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 68%|██████▊   | 17009/25000 [06:39<02:55, 45.65it/s]

Ep 17000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 68%|██████▊   | 17104/25000 [06:41<02:55, 45.09it/s]

Ep 17100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 69%|██████▉   | 17209/25000 [06:43<02:50, 45.82it/s]

Ep 17200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 69%|██████▉   | 17304/25000 [06:45<02:47, 46.07it/s]

Ep 17300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 1


 70%|██████▉   | 17403/25000 [06:48<03:23, 37.27it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 70%|███████   | 17507/25000 [06:51<03:11, 39.17it/s]

Ep 17500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 70%|███████   | 17606/25000 [06:53<02:45, 44.78it/s]

Ep 17600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 71%|███████   | 17706/25000 [06:55<02:36, 46.50it/s]

Ep 17700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 71%|███████   | 17806/25000 [06:57<02:39, 45.20it/s]

Ep 17800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 72%|███████▏  | 17906/25000 [07:00<02:34, 46.02it/s]

Ep 17900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 72%|███████▏  | 18007/25000 [07:02<03:05, 37.67it/s]

Ep 18000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 72%|███████▏  | 18104/25000 [07:05<03:20, 34.40it/s]

Ep 18100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 73%|███████▎  | 18208/25000 [07:07<02:29, 45.53it/s]

Ep 18200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 73%|███████▎  | 18308/25000 [07:09<02:21, 47.27it/s]

Ep 18300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 74%|███████▎  | 18408/25000 [07:12<02:21, 46.59it/s]

Ep 18400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 74%|███████▍  | 18508/25000 [07:14<02:22, 45.62it/s]

Ep 18500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 74%|███████▍  | 18606/25000 [07:16<02:59, 35.58it/s]

Ep 18600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 75%|███████▍  | 18706/25000 [07:19<03:00, 34.88it/s]

Ep 18700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 75%|███████▌  | 18806/25000 [07:21<02:15, 45.67it/s]

Ep 18800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 76%|███████▌  | 18906/25000 [07:24<02:14, 45.24it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 76%|███████▌  | 19006/25000 [07:26<02:09, 46.25it/s]

Ep 19000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 76%|███████▋  | 19106/25000 [07:28<02:11, 44.84it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 77%|███████▋  | 19204/25000 [07:30<02:37, 36.72it/s]

Ep 19200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 77%|███████▋  | 19303/25000 [07:33<02:41, 35.23it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 78%|███████▊  | 19409/25000 [07:36<02:02, 45.51it/s]

Ep 19400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 78%|███████▊  | 19509/25000 [07:38<01:58, 46.37it/s]

Ep 19500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 1


 78%|███████▊  | 19609/25000 [07:40<01:56, 46.30it/s]

Ep 19600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 79%|███████▉  | 19704/25000 [07:42<01:54, 46.36it/s]

Ep 19700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 79%|███████▉  | 19804/25000 [07:44<02:12, 39.25it/s]

Ep 19800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 80%|███████▉  | 19906/25000 [07:47<02:32, 33.34it/s]

Ep 19900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 80%|████████  | 20006/25000 [07:50<01:49, 45.79it/s]

Ep 20000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 80%|████████  | 20106/25000 [07:52<01:47, 45.71it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 81%|████████  | 20206/25000 [07:54<01:45, 45.58it/s]

Ep 20200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 1


 81%|████████  | 20306/25000 [07:56<01:42, 45.95it/s]

Ep 20300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 82%|████████▏ | 20406/25000 [07:58<01:48, 42.49it/s]

Ep 20400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 82%|████████▏ | 20506/25000 [08:01<02:03, 36.32it/s]

Ep 20500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 82%|████████▏ | 20605/25000 [08:04<01:34, 46.47it/s]

Ep 20600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 83%|████████▎ | 20705/25000 [08:06<01:33, 45.97it/s]

Ep 20700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 83%|████████▎ | 20805/25000 [08:08<01:34, 44.18it/s]

Ep 20800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 84%|████████▎ | 20905/25000 [08:10<01:30, 45.42it/s]

Ep 20900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 84%|████████▍ | 21005/25000 [08:12<01:26, 46.29it/s]

Ep 21000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 1


 84%|████████▍ | 21104/25000 [08:15<01:48, 35.80it/s]

Ep 21100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 85%|████████▍ | 21206/25000 [08:18<01:23, 45.24it/s]

Ep 21200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 85%|████████▌ | 21306/25000 [08:20<01:21, 45.31it/s]

Ep 21300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 86%|████████▌ | 21406/25000 [08:22<01:18, 45.69it/s]

Ep 21400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 86%|████████▌ | 21506/25000 [08:25<01:15, 46.40it/s]

Ep 21500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 86%|████████▋ | 21606/25000 [08:27<01:14, 45.30it/s]

Ep 21600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 87%|████████▋ | 21707/25000 [08:29<01:29, 36.64it/s]

Ep 21700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 87%|████████▋ | 21808/25000 [08:32<01:12, 44.06it/s]

Ep 21800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 88%|████████▊ | 21908/25000 [08:34<01:07, 46.08it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.6, First Action 1


 88%|████████▊ | 22008/25000 [08:37<01:04, 46.03it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 88%|████████▊ | 22108/25000 [08:39<01:01, 46.69it/s]

Ep 22100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 89%|████████▉ | 22208/25000 [08:41<01:00, 46.12it/s]

Ep 22200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 1


 89%|████████▉ | 22306/25000 [08:44<01:12, 37.26it/s]

Ep 22300/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 1


 90%|████████▉ | 22405/25000 [08:46<00:58, 44.28it/s]

Ep 22400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 90%|█████████ | 22505/25000 [08:49<00:55, 44.62it/s]

Ep 22500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 90%|█████████ | 22605/25000 [08:51<00:52, 45.83it/s]

Ep 22600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 91%|█████████ | 22705/25000 [08:53<00:49, 46.11it/s]

Ep 22700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 91%|█████████ | 22805/25000 [08:55<00:48, 45.55it/s]

Ep 22800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 92%|█████████▏| 22904/25000 [08:58<00:56, 37.00it/s]

Ep 22900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 92%|█████████▏| 23009/25000 [09:01<00:44, 44.44it/s]

Ep 23000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 92%|█████████▏| 23109/25000 [09:03<00:41, 45.94it/s]

Ep 23100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 93%|█████████▎| 23209/25000 [09:05<00:38, 46.76it/s]

Ep 23200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 93%|█████████▎| 23309/25000 [09:07<00:36, 46.20it/s]

Ep 23300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 94%|█████████▎| 23409/25000 [09:09<00:35, 44.67it/s]

Ep 23400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 94%|█████████▍| 23505/25000 [09:12<00:39, 38.09it/s]

Ep 23500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 1


 94%|█████████▍| 23609/25000 [09:15<00:32, 42.73it/s]

Ep 23600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 95%|█████████▍| 23709/25000 [09:17<00:28, 45.86it/s]

Ep 23700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 95%|█████████▌| 23809/25000 [09:19<00:26, 45.63it/s]

Ep 23800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 96%|█████████▌| 23909/25000 [09:21<00:23, 46.11it/s]

Ep 23900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 96%|█████████▌| 24009/25000 [09:24<00:21, 45.85it/s]

Ep 24000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 96%|█████████▋| 24103/25000 [09:26<00:23, 37.67it/s]

Ep 24100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 97%|█████████▋| 24206/25000 [09:29<00:19, 41.02it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 1


 97%|█████████▋| 24306/25000 [09:31<00:15, 46.20it/s]

Ep 24300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 98%|█████████▊| 24406/25000 [09:33<00:13, 44.69it/s]

Ep 24400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 98%|█████████▊| 24506/25000 [09:35<00:10, 45.77it/s]

Ep 24500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 98%|█████████▊| 24606/25000 [09:38<00:08, 45.87it/s]

Ep 24600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 1


 99%|█████████▉| 24705/25000 [09:40<00:08, 34.80it/s]

Ep 24700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 99%|█████████▉| 24806/25000 [09:43<00:05, 37.94it/s]

Ep 24800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


100%|█████████▉| 24905/25000 [09:45<00:02, 45.43it/s]

Ep 24900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


100%|██████████| 25000/25000 [09:47<00:00, 42.53it/s]


Ep 25000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1

TEST:


 40%|███▉      | 119/300 [00:00<00:01, 144.59it/s]

Ep 100/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 75%|███████▍  | 224/300 [00:01<00:00, 141.10it/s]

Ep 200/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


100%|██████████| 300/300 [00:02<00:00, 143.13it/s]


Ep 300/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 1

GAMMA 0.8 - LR 0.001 - Entropy Decay False


  0%|          | 109/25000 [00:02<09:08, 45.34it/s]

Ep 100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


  1%|          | 206/25000 [00:04<11:09, 37.04it/s]

Ep 200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


  1%|          | 308/25000 [00:07<09:57, 41.36it/s]

Ep 300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 3


  2%|▏         | 408/25000 [00:09<08:47, 46.60it/s]

Ep 400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.8, First Action 6


  2%|▏         | 508/25000 [00:12<08:43, 46.82it/s]

Ep 500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


  2%|▏         | 608/25000 [00:14<08:51, 45.87it/s]

Ep 600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


  3%|▎         | 708/25000 [00:16<08:51, 45.71it/s]

Ep 700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


  3%|▎         | 808/25000 [00:19<10:29, 38.41it/s]

Ep 800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


  4%|▎         | 906/25000 [00:21<10:13, 39.26it/s]

Ep 900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  4%|▍         | 1006/25000 [00:24<08:36, 46.47it/s]

Ep 1000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


  4%|▍         | 1106/25000 [00:26<08:44, 45.57it/s]

Ep 1100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


  5%|▍         | 1206/25000 [00:28<08:56, 44.32it/s]

Ep 1200/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 2


  5%|▌         | 1306/25000 [00:30<08:32, 46.21it/s]

Ep 1300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  6%|▌         | 1406/25000 [00:33<10:32, 37.32it/s]

Ep 1400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


  6%|▌         | 1505/25000 [00:35<11:38, 33.66it/s]

Ep 1500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


  6%|▋         | 1609/25000 [00:38<08:34, 45.44it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 3


  7%|▋         | 1709/25000 [00:40<08:31, 45.57it/s]

Ep 1700/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


  7%|▋         | 1809/25000 [00:42<08:22, 46.18it/s]

Ep 1800/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 1


  8%|▊         | 1909/25000 [00:44<08:34, 44.87it/s]

Ep 1900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


  8%|▊         | 2006/25000 [00:47<10:39, 35.94it/s]

Ep 2000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


  8%|▊         | 2107/25000 [00:50<10:54, 34.96it/s]

Ep 2100/25000, Opt. Action: 4, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.6, First Action 1


  9%|▉         | 2209/25000 [00:52<08:24, 45.14it/s]

Ep 2200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


  9%|▉         | 2309/25000 [00:54<08:12, 46.07it/s]

Ep 2300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 10%|▉         | 2409/25000 [00:56<08:15, 45.55it/s]

Ep 2400/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 6


 10%|█         | 2509/25000 [00:59<08:06, 46.24it/s]

Ep 2500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 10%|█         | 2605/25000 [01:01<09:54, 37.70it/s]

Ep 2600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 11%|█         | 2705/25000 [01:04<11:05, 33.52it/s]

Ep 2700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 4


 11%|█         | 2809/25000 [01:06<07:54, 46.81it/s]

Ep 2800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 12%|█▏        | 2909/25000 [01:08<08:12, 44.90it/s]

Ep 2900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 12%|█▏        | 3009/25000 [01:11<07:56, 46.15it/s]

Ep 3000/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 1


 12%|█▏        | 3109/25000 [01:13<08:15, 44.16it/s]

Ep 3100/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 0


 13%|█▎        | 3207/25000 [01:15<09:45, 37.19it/s]

Ep 3200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 13%|█▎        | 3303/25000 [01:18<10:41, 33.80it/s]

Ep 3300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 14%|█▎        | 3405/25000 [01:20<07:50, 45.88it/s]

Ep 3400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 14%|█▍        | 3505/25000 [01:22<07:40, 46.71it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 14%|█▍        | 3605/25000 [01:25<07:39, 46.61it/s]

Ep 3600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 3


 15%|█▍        | 3705/25000 [01:27<07:52, 45.07it/s]

Ep 3700/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 1


 15%|█▌        | 3806/25000 [01:29<10:07, 34.88it/s]

Ep 3800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 16%|█▌        | 3903/25000 [01:32<10:13, 34.38it/s]

Ep 3900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 3


 16%|█▌        | 4005/25000 [01:34<07:36, 45.96it/s]

Ep 4000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 16%|█▋        | 4105/25000 [01:37<07:44, 45.00it/s]

Ep 4100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 17%|█▋        | 4205/25000 [01:39<07:43, 44.83it/s]

Ep 4200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 4


 17%|█▋        | 4305/25000 [01:41<07:46, 44.39it/s]

Ep 4300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 18%|█▊        | 4404/25000 [01:43<08:52, 38.71it/s]

Ep 4400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.1, First Action 3


 18%|█▊        | 4506/25000 [01:46<09:37, 35.46it/s]

Ep 4500/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.0, First Action 1


 18%|█▊        | 4607/25000 [01:49<07:18, 46.50it/s]

Ep 4600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 1


 19%|█▉        | 4707/25000 [01:51<07:19, 46.14it/s]

Ep 4700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 9


 19%|█▉        | 4807/25000 [01:53<07:26, 45.20it/s]

Ep 4800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 20%|█▉        | 4907/25000 [01:55<07:29, 44.72it/s]

Ep 4900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 6


 20%|██        | 5007/25000 [01:57<08:25, 39.55it/s]

Ep 5000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.5, First Action 4


 20%|██        | 5106/25000 [02:00<09:26, 35.14it/s]

Ep 5100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 1


 21%|██        | 5205/25000 [02:03<07:17, 45.29it/s]

Ep 5200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.1, First Action 1


 21%|██        | 5305/25000 [02:05<07:14, 45.30it/s]

Ep 5300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 22%|██▏       | 5405/25000 [02:07<07:08, 45.68it/s]

Ep 5400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 22%|██▏       | 5505/25000 [02:09<07:03, 46.06it/s]

Ep 5500/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.2, First Action 1


 22%|██▏       | 5605/25000 [02:11<06:58, 46.33it/s]

Ep 5600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 1


 23%|██▎       | 5704/25000 [02:14<09:04, 35.42it/s]

Ep 5700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 23%|██▎       | 5806/25000 [02:17<07:06, 45.04it/s]

Ep 5800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 24%|██▎       | 5906/25000 [02:19<07:03, 45.13it/s]

Ep 5900/25000, Opt. Action: 6, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.3, First Action 1


 24%|██▍       | 6006/25000 [02:21<06:57, 45.53it/s]

Ep 6000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 24%|██▍       | 6106/25000 [02:24<06:49, 46.15it/s]

Ep 6100/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.0, First Action 1


 25%|██▍       | 6206/25000 [02:26<06:46, 46.19it/s]

Ep 6200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 1


 25%|██▌       | 6307/25000 [02:28<08:32, 36.49it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 26%|██▌       | 6406/25000 [02:31<06:49, 45.44it/s]

Ep 6400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 26%|██▌       | 6506/25000 [02:33<06:49, 45.12it/s]

Ep 6500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.1, First Action 1


 26%|██▋       | 6606/25000 [02:36<06:39, 46.09it/s]

Ep 6600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 27%|██▋       | 6706/25000 [02:38<06:49, 44.70it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.9, First Action 1


 27%|██▋       | 6806/25000 [02:40<06:43, 45.07it/s]

Ep 6800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 28%|██▊       | 6907/25000 [02:43<08:16, 36.47it/s]

Ep 6900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 28%|██▊       | 7005/25000 [02:45<06:48, 44.03it/s]

Ep 7000/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.8, First Action 1


 28%|██▊       | 7105/25000 [02:48<06:36, 45.09it/s]

Ep 7100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 29%|██▉       | 7205/25000 [02:50<06:32, 45.31it/s]

Ep 7200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.4, First Action 1


 29%|██▉       | 7305/25000 [02:52<06:18, 46.79it/s]

Ep 7300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 30%|██▉       | 7405/25000 [02:54<06:20, 46.28it/s]

Ep 7400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 1


 30%|███       | 7505/25000 [02:57<08:02, 36.22it/s]

Ep 7500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 30%|███       | 7606/25000 [03:00<06:45, 42.94it/s]

Ep 7600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 31%|███       | 7706/25000 [03:02<06:33, 43.97it/s]

Ep 7700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.6, First Action 1


 31%|███       | 7806/25000 [03:04<06:17, 45.55it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 1


 32%|███▏      | 7906/25000 [03:06<06:09, 46.20it/s]

Ep 7900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.4, First Action 1


 32%|███▏      | 8006/25000 [03:08<06:08, 46.10it/s]

Ep 8000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 32%|███▏      | 8107/25000 [03:11<07:36, 37.04it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 1


 33%|███▎      | 8208/25000 [03:14<06:50, 40.90it/s]

Ep 8200/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.1, First Action 1


 33%|███▎      | 8308/25000 [03:16<06:10, 45.02it/s]

Ep 8300/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 1


 34%|███▎      | 8408/25000 [03:18<06:08, 44.97it/s]

Ep 8400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 34%|███▍      | 8508/25000 [03:20<06:05, 45.10it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 34%|███▍      | 8608/25000 [03:23<05:56, 45.93it/s]

Ep 8600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 1


 35%|███▍      | 8706/25000 [03:25<07:22, 36.80it/s]

Ep 8700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 1


 35%|███▌      | 8808/25000 [03:28<07:10, 37.64it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 1


 36%|███▌      | 8908/25000 [03:30<05:49, 46.03it/s]

Ep 8900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 1


 36%|███▌      | 9008/25000 [03:32<05:46, 46.21it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 1


 36%|███▋      | 9108/25000 [03:35<05:47, 45.69it/s]

Ep 9100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 37%|███▋      | 9208/25000 [03:37<05:59, 43.93it/s]

Ep 9200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 37%|███▋      | 9305/25000 [03:39<06:50, 38.25it/s]

Ep 9300/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.9, First Action 1


 38%|███▊      | 9403/25000 [03:42<07:31, 34.56it/s]

Ep 9400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 38%|███▊      | 9507/25000 [03:44<05:37, 45.92it/s]

Ep 9500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 38%|███▊      | 9607/25000 [03:47<05:38, 45.53it/s]

Ep 9600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.4, First Action 1


 39%|███▉      | 9707/25000 [03:49<05:41, 44.80it/s]

Ep 9700/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.2, First Action 1


 39%|███▉      | 9807/25000 [03:51<05:30, 45.98it/s]

Ep 9800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 1


 40%|███▉      | 9904/25000 [03:53<06:45, 37.26it/s]

Ep 9900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 40%|████      | 10004/25000 [03:56<07:08, 34.98it/s]

Ep 10000/25000, Opt. Action: 2, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.9, First Action 1


 40%|████      | 10105/25000 [03:59<05:25, 45.75it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 41%|████      | 10205/25000 [04:01<05:31, 44.61it/s]

Ep 10200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 41%|████      | 10305/25000 [04:03<05:28, 44.70it/s]

Ep 10300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 1


 42%|████▏     | 10405/25000 [04:05<05:17, 45.99it/s]

Ep 10400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 42%|████▏     | 10505/25000 [04:08<06:28, 37.29it/s]

Ep 10500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 1


 42%|████▏     | 10603/25000 [04:10<07:06, 33.74it/s]

Ep 10600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 43%|████▎     | 10706/25000 [04:13<05:13, 45.63it/s]

Ep 10700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 43%|████▎     | 10806/25000 [04:15<05:06, 46.35it/s]

Ep 10800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 1


 44%|████▎     | 10906/25000 [04:17<05:01, 46.80it/s]

Ep 10900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 1


 44%|████▍     | 11006/25000 [04:20<05:06, 45.68it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 1


 44%|████▍     | 11105/25000 [04:22<06:14, 37.06it/s]

Ep 11100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 1


 45%|████▍     | 11204/25000 [04:24<06:21, 36.16it/s]

Ep 11200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.2, First Action 1


 45%|████▌     | 11306/25000 [04:27<05:03, 45.18it/s]

Ep 11300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.8, First Action 1


 46%|████▌     | 11406/25000 [04:29<04:55, 46.05it/s]

Ep 11400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 46%|████▌     | 11506/25000 [04:32<05:07, 43.93it/s]

Ep 11500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 1


 46%|████▋     | 11606/25000 [04:34<04:55, 45.29it/s]

Ep 11600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 1


 47%|████▋     | 11706/25000 [04:36<04:48, 46.04it/s]

Ep 11700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 1


 47%|████▋     | 11806/25000 [04:39<05:51, 37.58it/s]

Ep 11800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 1


 48%|████▊     | 11908/25000 [04:41<04:49, 45.21it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 1


 48%|████▊     | 12008/25000 [04:44<04:44, 45.63it/s]

Ep 12000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 1


 48%|████▊     | 12108/25000 [04:46<04:45, 45.12it/s]

Ep 12100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 1


 49%|████▉     | 12208/25000 [04:48<04:41, 45.41it/s]

Ep 12200/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.4, First Action 1


 49%|████▉     | 12308/25000 [04:50<04:37, 45.71it/s]

Ep 12300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 50%|████▉     | 12404/25000 [04:53<05:56, 35.36it/s]

Ep 12400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 1


 50%|█████     | 12509/25000 [04:56<04:36, 45.26it/s]

Ep 12500/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 1


 50%|█████     | 12609/25000 [04:58<04:36, 44.89it/s]

Ep 12600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.1, First Action 1


 51%|█████     | 12709/25000 [05:00<04:36, 44.44it/s]

Ep 12700/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.6, First Action 1


 51%|█████     | 12809/25000 [05:02<04:31, 44.85it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.6, First Action 1


 52%|█████▏    | 12909/25000 [05:04<04:23, 45.80it/s]

Ep 12900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.2, First Action 1


 52%|█████▏    | 13005/25000 [05:07<05:24, 37.00it/s]

Ep 13000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 1


 52%|█████▏    | 13109/25000 [05:10<04:29, 44.16it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 1


 53%|█████▎    | 13209/25000 [05:12<04:16, 46.00it/s]

Ep 13200/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.8, First Action 1


 53%|█████▎    | 13309/25000 [05:14<04:13, 46.08it/s]

Ep 13300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 1


 54%|█████▎    | 13409/25000 [05:17<04:10, 46.26it/s]

Ep 13400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.2, First Action 1


 54%|█████▍    | 13509/25000 [05:19<04:09, 46.02it/s]

Ep 13500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 1


 54%|█████▍    | 13605/25000 [05:21<04:56, 38.44it/s]

Ep 13600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 1


 55%|█████▍    | 13707/25000 [05:24<04:20, 43.27it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.8, First Action 1


 55%|█████▌    | 13807/25000 [05:26<04:07, 45.19it/s]

Ep 13800/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 1


 56%|█████▌    | 13907/25000 [05:28<04:05, 45.25it/s]

Ep 13900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.3, First Action 1


 56%|█████▌    | 14007/25000 [05:31<04:00, 45.76it/s]

Ep 14000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 15.2, First Action 1


 56%|█████▋    | 14107/25000 [05:33<04:07, 44.05it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.8, First Action 1


 57%|█████▋    | 14206/25000 [05:36<04:55, 36.49it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.6, First Action 1


 57%|█████▋    | 14306/25000 [05:38<04:21, 40.92it/s]

Ep 14300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 1


 58%|█████▊    | 14406/25000 [05:41<03:47, 46.60it/s]

Ep 14400/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.7, First Action 1


 58%|█████▊    | 14506/25000 [05:43<03:49, 45.66it/s]

Ep 14500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 1


 58%|█████▊    | 14606/25000 [05:45<03:52, 44.70it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.9, First Action 1


 59%|█████▉    | 14706/25000 [05:47<03:48, 45.03it/s]

Ep 14700/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 12.3, First Action 1


 59%|█████▉    | 14805/25000 [05:50<04:30, 37.63it/s]

Ep 14800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 1


 60%|█████▉    | 14908/25000 [05:53<04:14, 39.73it/s]

Ep 14900/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 1


 60%|██████    | 15008/25000 [05:55<03:41, 45.07it/s]

Ep 15000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 1


 60%|██████    | 15108/25000 [05:57<03:39, 45.11it/s]

Ep 15100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 61%|██████    | 15208/25000 [05:59<03:30, 46.50it/s]

Ep 15200/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.0, First Action 1


 61%|██████    | 15308/25000 [06:02<03:34, 45.12it/s]

Ep 15300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 1


 62%|██████▏   | 15406/25000 [06:04<04:07, 38.76it/s]

Ep 15400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.2, First Action 1


 62%|██████▏   | 15506/25000 [06:07<04:45, 33.26it/s]

Ep 15500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 1


 62%|██████▏   | 15605/25000 [06:09<03:28, 45.06it/s]

Ep 15600/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 1


 63%|██████▎   | 15705/25000 [06:11<03:22, 46.00it/s]

Ep 15700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 63%|██████▎   | 15805/25000 [06:14<03:19, 46.16it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.5, First Action 1


 64%|██████▎   | 15905/25000 [06:16<03:22, 44.85it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.6, First Action 1


 64%|██████▍   | 16006/25000 [06:18<04:10, 35.95it/s]

Ep 16000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 1


 64%|██████▍   | 16104/25000 [06:21<04:22, 33.85it/s]

Ep 16100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 1


 65%|██████▍   | 16206/25000 [06:23<03:10, 46.06it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 1


 65%|██████▌   | 16306/25000 [06:26<03:09, 45.98it/s]

Ep 16300/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.2, First Action 1


 66%|██████▌   | 16406/25000 [06:28<03:13, 44.34it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.9, First Action 1


 66%|██████▌   | 16506/25000 [06:30<03:07, 45.26it/s]

Ep 16500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 66%|██████▋   | 16605/25000 [06:32<03:37, 38.66it/s]

Ep 16600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 1


 67%|██████▋   | 16705/25000 [06:35<04:02, 34.16it/s]

Ep 16700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 1


 67%|██████▋   | 16806/25000 [06:38<02:58, 45.80it/s]

Ep 16800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 1


 68%|██████▊   | 16906/25000 [06:40<03:00, 44.89it/s]

Ep 16900/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 12.2, First Action 1


 68%|██████▊   | 17006/25000 [06:42<02:54, 45.90it/s]

Ep 17000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 1


 68%|██████▊   | 17106/25000 [06:44<02:55, 45.00it/s]

Ep 17100/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.4, First Action 1


 69%|██████▉   | 17205/25000 [06:47<03:23, 38.30it/s]

Ep 17200/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 1


 69%|██████▉   | 17303/25000 [06:49<03:50, 33.35it/s]

Ep 17300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 1


 70%|██████▉   | 17409/25000 [06:52<02:49, 44.83it/s]

Ep 17400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.4, First Action 1


 70%|███████   | 17509/25000 [06:54<02:43, 45.76it/s]

Ep 17500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 1


 70%|███████   | 17609/25000 [06:56<02:39, 46.22it/s]

Ep 17600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 71%|███████   | 17709/25000 [06:59<02:37, 46.26it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 11.9, First Action 1


 71%|███████   | 17804/25000 [07:01<03:06, 38.61it/s]

Ep 17800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.8, First Action 1


 72%|███████▏  | 17906/25000 [07:04<03:21, 35.26it/s]

Ep 17900/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.9, First Action 1


 72%|███████▏  | 18007/25000 [07:06<02:37, 44.41it/s]

Ep 18000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 72%|███████▏  | 18107/25000 [07:08<02:32, 45.33it/s]

Ep 18100/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.4, First Action 1


 73%|███████▎  | 18207/25000 [07:11<02:27, 46.14it/s]

Ep 18200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.4, First Action 1


 73%|███████▎  | 18307/25000 [07:13<02:28, 45.03it/s]

Ep 18300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 74%|███████▎  | 18407/25000 [07:15<02:25, 45.24it/s]

Ep 18400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 74%|███████▍  | 18506/25000 [07:18<02:51, 37.94it/s]

Ep 18500/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.7, First Action 1


 74%|███████▍  | 18608/25000 [07:20<02:22, 44.99it/s]

Ep 18600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 1


 75%|███████▍  | 18708/25000 [07:23<02:15, 46.37it/s]

Ep 18700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 15.0, First Action 1


 75%|███████▌  | 18808/25000 [07:25<02:21, 43.89it/s]

Ep 18800/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.5, First Action 1


 76%|███████▌  | 18908/25000 [07:27<02:15, 45.09it/s]

Ep 18900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 1


 76%|███████▌  | 19008/25000 [07:29<02:11, 45.55it/s]

Ep 19000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.8, First Action 1


 76%|███████▋  | 19107/25000 [07:32<02:44, 35.83it/s]

Ep 19100/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 13.8, First Action 1


 77%|███████▋  | 19209/25000 [07:35<02:13, 43.26it/s]

Ep 19200/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.0, First Action 1


 77%|███████▋  | 19309/25000 [07:37<02:04, 45.55it/s]

Ep 19300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 78%|███████▊  | 19409/25000 [07:39<02:02, 45.77it/s]

Ep 19400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.2, First Action 1


 78%|███████▊  | 19509/25000 [07:41<01:58, 46.22it/s]

Ep 19500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 1


 78%|███████▊  | 19609/25000 [07:44<01:59, 44.99it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 79%|███████▉  | 19704/25000 [07:46<02:28, 35.76it/s]

Ep 19700/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.5, First Action 1


 79%|███████▉  | 19807/25000 [07:49<02:06, 40.89it/s]

Ep 19800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.6, First Action 1


 80%|███████▉  | 19907/25000 [07:51<01:52, 45.42it/s]

Ep 19900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.9, First Action 1


 80%|████████  | 20007/25000 [07:53<01:48, 46.12it/s]

Ep 20000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 80%|████████  | 20107/25000 [07:56<01:46, 46.02it/s]

Ep 20100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 1


 81%|████████  | 20207/25000 [07:58<01:45, 45.60it/s]

Ep 20200/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.3, First Action 1


 81%|████████  | 20306/25000 [08:00<02:04, 37.71it/s]

Ep 20300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.4, First Action 1


 82%|████████▏ | 20407/25000 [08:03<02:04, 36.85it/s]

Ep 20400/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.6, First Action 1


 82%|████████▏ | 20506/25000 [08:05<01:39, 45.20it/s]

Ep 20500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 1


 82%|████████▏ | 20606/25000 [08:08<01:37, 44.88it/s]

Ep 20600/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 15.4, First Action 1


 83%|████████▎ | 20706/25000 [08:10<01:35, 45.09it/s]

Ep 20700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.6, First Action 1


 83%|████████▎ | 20806/25000 [08:12<01:31, 45.99it/s]

Ep 20800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.7, First Action 1


 84%|████████▎ | 20906/25000 [08:15<01:47, 37.99it/s]

Ep 20900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.0, First Action 1


 84%|████████▍ | 21007/25000 [08:18<01:51, 35.89it/s]

Ep 21000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.3, First Action 1


 84%|████████▍ | 21107/25000 [08:20<01:26, 45.11it/s]

Ep 21100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 1


 85%|████████▍ | 21207/25000 [08:22<01:24, 44.83it/s]

Ep 21200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 1


 85%|████████▌ | 21307/25000 [08:24<01:21, 45.42it/s]

Ep 21300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 1


 86%|████████▌ | 21407/25000 [08:26<01:18, 45.58it/s]

Ep 21400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.7, First Action 1


 86%|████████▌ | 21507/25000 [08:29<01:35, 36.59it/s]

Ep 21500/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 14.4, First Action 1


 86%|████████▋ | 21604/25000 [08:32<01:41, 33.43it/s]

Ep 21600/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 1


 87%|████████▋ | 21707/25000 [08:34<01:12, 45.28it/s]

Ep 21700/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.6, First Action 1


 87%|████████▋ | 21807/25000 [08:36<01:10, 45.38it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.4, First Action 1


 88%|████████▊ | 21907/25000 [08:39<01:06, 46.58it/s]

Ep 21900/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.6, First Action 1


 88%|████████▊ | 22007/25000 [08:41<01:06, 45.09it/s]

Ep 22000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 1


 88%|████████▊ | 22106/25000 [08:43<01:17, 37.57it/s]

Ep 22100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.4, First Action 1


 89%|████████▉ | 22205/25000 [08:46<01:24, 32.90it/s]

Ep 22200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.7, First Action 1


 89%|████████▉ | 22306/25000 [08:48<01:01, 44.07it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.7, First Action 1


 90%|████████▉ | 22406/25000 [08:51<00:56, 46.29it/s]

Ep 22400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1


 90%|█████████ | 22506/25000 [08:53<00:55, 44.97it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.4, First Action 1


 90%|█████████ | 22606/25000 [08:55<00:52, 45.70it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 12.9, First Action 1


 91%|█████████ | 22706/25000 [08:57<01:02, 36.55it/s]

Ep 22700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 1


 91%|█████████ | 22803/25000 [09:00<01:05, 33.33it/s]

Ep 22800/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 14.1, First Action 1


 92%|█████████▏| 22906/25000 [09:03<00:46, 45.31it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 14.6, First Action 1


 92%|█████████▏| 23006/25000 [09:05<00:46, 43.08it/s]

Ep 23000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.4, First Action 1


 92%|█████████▏| 23106/25000 [09:07<00:41, 45.61it/s]

Ep 23100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 1


 93%|█████████▎| 23206/25000 [09:09<00:39, 45.81it/s]

Ep 23200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.7, First Action 1


 93%|█████████▎| 23304/25000 [09:12<00:45, 36.89it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 1


 94%|█████████▎| 23405/25000 [09:14<00:46, 34.63it/s]

Ep 23400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.9, First Action 1


 94%|█████████▍| 23507/25000 [09:17<00:32, 45.33it/s]

Ep 23500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.1, First Action 1


 94%|█████████▍| 23607/25000 [09:19<00:31, 44.91it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.0, First Action 1


 95%|█████████▍| 23707/25000 [09:21<00:28, 44.92it/s]

Ep 23700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.7, First Action 1


 95%|█████████▌| 23807/25000 [09:24<00:26, 45.16it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 13.5, First Action 1


 96%|█████████▌| 23906/25000 [09:26<00:30, 35.61it/s]

Ep 23900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 13.8, First Action 1


 96%|█████████▌| 24003/25000 [09:29<00:30, 32.25it/s]

Ep 24000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.3, First Action 1


 96%|█████████▋| 24108/25000 [09:31<00:19, 45.31it/s]

Ep 24100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.5, First Action 1


 97%|█████████▋| 24208/25000 [09:34<00:17, 45.30it/s]

Ep 24200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.8, First Action 1


 97%|█████████▋| 24308/25000 [09:36<00:15, 45.63it/s]

Ep 24300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.7, First Action 1


 98%|█████████▊| 24408/25000 [09:38<00:13, 44.81it/s]

Ep 24400/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 12.3, First Action 1


 98%|█████████▊| 24503/25000 [09:40<00:12, 38.59it/s]

Ep 24500/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 13.9, First Action 1


 98%|█████████▊| 24604/25000 [09:43<00:11, 35.28it/s]

Ep 24600/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 13.8, First Action 1


 99%|█████████▉| 24708/25000 [09:46<00:06, 44.79it/s]

Ep 24700/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 14.5, First Action 1


 99%|█████████▉| 24808/25000 [09:48<00:04, 45.55it/s]

Ep 24800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.3, First Action 1


100%|█████████▉| 24908/25000 [09:50<00:01, 46.04it/s]

Ep 24900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.2, First Action 1


100%|██████████| 25000/25000 [09:52<00:00, 42.19it/s]


Ep 25000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.4, First Action 1

TEST:


 39%|███▊      | 116/300 [00:00<00:01, 142.44it/s]

Ep 100/300, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 14.8, First Action 1


 74%|███████▎  | 221/300 [00:01<00:00, 138.35it/s]

Ep 200/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 13.0, First Action 1


100%|██████████| 300/300 [00:02<00:00, 136.39it/s]


Ep 300/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.1, First Action 1

GAMMA 0.9 - LR 0.01 - Entropy Decay False


  0%|          | 104/25000 [00:02<11:24, 36.37it/s]

Ep 100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 3


  1%|          | 205/25000 [00:05<09:16, 44.57it/s]

Ep 200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


  1%|          | 305/25000 [00:07<09:20, 44.03it/s]

Ep 300/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 5


  2%|▏         | 405/25000 [00:09<08:51, 46.26it/s]

Ep 400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  2%|▏         | 505/25000 [00:12<08:51, 46.04it/s]

Ep 500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 5


  2%|▏         | 605/25000 [00:14<08:59, 45.25it/s]

Ep 600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


  3%|▎         | 707/25000 [00:17<10:32, 38.43it/s]

Ep 700/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 5


  3%|▎         | 809/25000 [00:19<09:00, 44.78it/s]

Ep 800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  4%|▎         | 904/25000 [00:22<08:47, 45.71it/s]

Ep 900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


  4%|▍         | 1009/25000 [00:24<08:57, 44.65it/s]

Ep 1000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 5


  4%|▍         | 1109/25000 [00:26<08:52, 44.86it/s]

Ep 1100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  5%|▍         | 1209/25000 [00:28<08:40, 45.69it/s]

Ep 1200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  5%|▌         | 1306/25000 [00:31<10:27, 37.78it/s]

Ep 1300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


  6%|▌         | 1408/25000 [00:34<09:24, 41.82it/s]

Ep 1400/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.8, First Action 5


  6%|▌         | 1508/25000 [00:36<08:39, 45.22it/s]

Ep 1500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  6%|▋         | 1608/25000 [00:38<08:36, 45.25it/s]

Ep 1600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  7%|▋         | 1708/25000 [00:40<08:40, 44.74it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  7%|▋         | 1808/25000 [00:43<08:25, 45.89it/s]

Ep 1800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  8%|▊         | 1907/25000 [00:45<10:18, 37.36it/s]

Ep 1900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  8%|▊         | 2005/25000 [00:48<09:50, 38.92it/s]

Ep 2000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  8%|▊         | 2105/25000 [00:50<08:30, 44.81it/s]

Ep 2100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


  9%|▉         | 2205/25000 [00:52<08:15, 46.00it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


  9%|▉         | 2305/25000 [00:55<08:13, 45.98it/s]

Ep 2300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 10%|▉         | 2405/25000 [00:57<08:30, 44.25it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 10%|█         | 2504/25000 [00:59<09:39, 38.83it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 10%|█         | 2605/25000 [01:02<10:44, 34.77it/s]

Ep 2600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 11%|█         | 2707/25000 [01:05<08:17, 44.84it/s]

Ep 2700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 11%|█         | 2807/25000 [01:07<08:24, 44.02it/s]

Ep 2800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 12%|█▏        | 2907/25000 [01:09<08:09, 45.17it/s]

Ep 2900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 12%|█▏        | 3007/25000 [01:11<08:11, 44.77it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 12%|█▏        | 3107/25000 [01:14<09:51, 37.02it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 13%|█▎        | 3206/25000 [01:17<10:29, 34.62it/s]

Ep 3200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 13%|█▎        | 3308/25000 [01:19<08:02, 44.98it/s]

Ep 3300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 14%|█▎        | 3408/25000 [01:21<07:53, 45.60it/s]

Ep 3400/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 1


 14%|█▍        | 3508/25000 [01:23<07:50, 45.65it/s]

Ep 3500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 1


 14%|█▍        | 3608/25000 [01:26<07:40, 46.45it/s]

Ep 3600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 15%|█▍        | 3707/25000 [01:28<09:54, 35.85it/s]

Ep 3700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 15%|█▌        | 3806/25000 [01:31<10:29, 33.67it/s]

Ep 3800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 16%|█▌        | 3908/25000 [01:33<07:38, 46.00it/s]

Ep 3900/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.1, First Action 1


 16%|█▌        | 4008/25000 [01:35<07:41, 45.52it/s]

Ep 4000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 16%|█▋        | 4108/25000 [01:38<07:46, 44.83it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 17%|█▋        | 4208/25000 [01:40<07:44, 44.78it/s]

Ep 4200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 17%|█▋        | 4305/25000 [01:42<09:52, 34.93it/s]

Ep 4300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 18%|█▊        | 4406/25000 [01:45<10:26, 32.88it/s]

Ep 4400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 18%|█▊        | 4505/25000 [01:48<07:30, 45.49it/s]

Ep 4500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 18%|█▊        | 4605/25000 [01:50<07:31, 45.14it/s]

Ep 4600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 19%|█▉        | 4705/25000 [01:52<07:27, 45.34it/s]

Ep 4700/25000, Opt. Action: 1, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.9, First Action 1


 19%|█▉        | 4805/25000 [01:54<07:38, 44.07it/s]

Ep 4800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 20%|█▉        | 4907/25000 [01:57<08:53, 37.64it/s]

Ep 4900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 1


 20%|██        | 5004/25000 [01:59<09:36, 34.69it/s]

Ep 5000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 20%|██        | 5105/25000 [02:02<07:26, 44.51it/s]

Ep 5100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 21%|██        | 5205/25000 [02:04<07:18, 45.19it/s]

Ep 5200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 21%|██        | 5305/25000 [02:06<07:14, 45.30it/s]

Ep 5300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 22%|██▏       | 5405/25000 [02:08<07:03, 46.23it/s]

Ep 5400/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 2


 22%|██▏       | 5506/25000 [02:11<09:06, 35.65it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 22%|██▏       | 5604/25000 [02:14<09:27, 34.15it/s]

Ep 5600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 23%|██▎       | 5708/25000 [02:16<07:08, 44.98it/s]

Ep 5700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 23%|██▎       | 5808/25000 [02:19<06:55, 46.17it/s]

Ep 5800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 24%|██▎       | 5908/25000 [02:21<07:05, 44.92it/s]

Ep 5900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 24%|██▍       | 6008/25000 [02:23<06:58, 45.41it/s]

Ep 6000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 24%|██▍       | 6107/25000 [02:25<08:23, 37.54it/s]

Ep 6100/25000, Opt. Action: 1, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.6, First Action 2


 25%|██▍       | 6206/25000 [02:28<09:08, 34.23it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 25%|██▌       | 6305/25000 [02:31<07:07, 43.75it/s]

Ep 6300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 2


 26%|██▌       | 6405/25000 [02:33<06:54, 44.90it/s]

Ep 6400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 2


 26%|██▌       | 6505/25000 [02:35<06:52, 44.84it/s]

Ep 6500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 26%|██▋       | 6605/25000 [02:38<06:44, 45.42it/s]

Ep 6600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 27%|██▋       | 6705/25000 [02:40<07:56, 38.41it/s]

Ep 6700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 2


 27%|██▋       | 6807/25000 [02:43<07:59, 37.95it/s]

Ep 6800/25000, Opt. Action: 1, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.9, First Action 2


 28%|██▊       | 6908/25000 [02:45<06:54, 43.65it/s]

Ep 6900/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.9, First Action 2


 28%|██▊       | 7008/25000 [02:48<06:38, 45.20it/s]

Ep 7000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 2


 28%|██▊       | 7108/25000 [02:50<06:39, 44.82it/s]

Ep 7100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 2


 29%|██▉       | 7208/25000 [02:52<06:35, 44.97it/s]

Ep 7200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 29%|██▉       | 7308/25000 [02:54<06:26, 45.81it/s]

Ep 7300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 2


 30%|██▉       | 7406/25000 [02:57<08:01, 36.51it/s]

Ep 7400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 30%|███       | 7507/25000 [03:00<06:38, 43.88it/s]

Ep 7500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 2


 30%|███       | 7607/25000 [03:02<06:43, 43.07it/s]

Ep 7600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 31%|███       | 7707/25000 [03:04<06:27, 44.63it/s]

Ep 7700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 31%|███       | 7807/25000 [03:07<06:45, 42.38it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 32%|███▏      | 7902/25000 [03:09<06:55, 41.14it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 2


 32%|███▏      | 8005/25000 [03:12<07:38, 37.04it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.5, First Action 2


 32%|███▏      | 8109/25000 [03:15<06:23, 44.01it/s]

Ep 8100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 33%|███▎      | 8204/25000 [03:17<06:24, 43.69it/s]

Ep 8200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 2


 33%|███▎      | 8309/25000 [03:19<06:14, 44.53it/s]

Ep 8300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 2


 34%|███▎      | 8409/25000 [03:21<06:08, 44.99it/s]

Ep 8400/25000, Opt. Action: 3, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.4, First Action 2


 34%|███▍      | 8504/25000 [03:24<06:10, 44.56it/s]

Ep 8500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 34%|███▍      | 8606/25000 [03:26<07:21, 37.12it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 35%|███▍      | 8705/25000 [03:29<06:18, 43.03it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.8, First Action 2


 35%|███▌      | 8805/25000 [03:31<05:58, 45.21it/s]

Ep 8800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 36%|███▌      | 8905/25000 [03:34<05:54, 45.45it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 36%|███▌      | 9005/25000 [03:36<05:55, 44.95it/s]

Ep 9000/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.3, First Action 2


 36%|███▋      | 9105/25000 [03:38<05:54, 44.82it/s]

Ep 9100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 37%|███▋      | 9206/25000 [03:41<07:02, 37.41it/s]

Ep 9200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 37%|███▋      | 9307/25000 [03:44<06:04, 43.06it/s]

Ep 9300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 38%|███▊      | 9407/25000 [03:46<05:53, 44.11it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 38%|███▊      | 9507/25000 [03:48<05:37, 45.88it/s]

Ep 9500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 38%|███▊      | 9607/25000 [03:50<05:39, 45.29it/s]

Ep 9600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.9, First Action 2


 39%|███▉      | 9707/25000 [03:53<05:32, 46.06it/s]

Ep 9700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 39%|███▉      | 9806/25000 [03:55<06:57, 36.39it/s]

Ep 9800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 2


 40%|███▉      | 9907/25000 [03:58<06:29, 38.73it/s]

Ep 9900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 2


 40%|████      | 10006/25000 [04:00<05:40, 44.08it/s]

Ep 10000/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.3, First Action 2


 40%|████      | 10106/25000 [04:02<05:31, 44.98it/s]

Ep 10100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 41%|████      | 10206/25000 [04:05<05:25, 45.49it/s]

Ep 10200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 2


 41%|████      | 10306/25000 [04:07<05:29, 44.66it/s]

Ep 10300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 42%|████▏     | 10405/25000 [04:09<06:29, 37.48it/s]

Ep 10400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 2


 42%|████▏     | 10506/25000 [04:12<06:47, 35.58it/s]

Ep 10500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 42%|████▏     | 10608/25000 [04:15<05:15, 45.66it/s]

Ep 10600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 43%|████▎     | 10708/25000 [04:17<05:32, 42.93it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 2


 43%|████▎     | 10808/25000 [04:19<05:15, 44.99it/s]

Ep 10800/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.8, First Action 2


 44%|████▎     | 10908/25000 [04:21<05:09, 45.50it/s]

Ep 10900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 2


 44%|████▍     | 11005/25000 [04:24<06:14, 37.40it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 2


 44%|████▍     | 11105/25000 [04:27<06:36, 35.04it/s]

Ep 11100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 2


 45%|████▍     | 11207/25000 [04:29<05:07, 44.87it/s]

Ep 11200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 45%|████▌     | 11307/25000 [04:31<04:59, 45.75it/s]

Ep 11300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 2


 46%|████▌     | 11407/25000 [04:33<05:05, 44.44it/s]

Ep 11400/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 15.9, First Action 2


 46%|████▌     | 11507/25000 [04:36<04:59, 45.02it/s]

Ep 11500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 2


 46%|████▋     | 11605/25000 [04:38<05:59, 37.28it/s]

Ep 11600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 2


 47%|████▋     | 11706/25000 [04:41<06:47, 32.65it/s]

Ep 11700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 2


 47%|████▋     | 11806/25000 [04:43<05:04, 43.39it/s]

Ep 11800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 48%|████▊     | 11906/25000 [04:46<04:47, 45.59it/s]

Ep 11900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 48%|████▊     | 12006/25000 [04:48<04:49, 44.87it/s]

Ep 12000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 2


 48%|████▊     | 12106/25000 [04:50<04:42, 45.63it/s]

Ep 12100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 49%|████▉     | 12205/25000 [04:52<05:44, 37.13it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.2, First Action 2


 49%|████▉     | 12306/25000 [04:55<06:24, 33.03it/s]

Ep 12300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.9, First Action 2


 50%|████▉     | 12409/25000 [04:58<04:43, 44.35it/s]

Ep 12400/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.3, First Action 2


 50%|█████     | 12509/25000 [05:00<04:36, 45.20it/s]

Ep 12500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 2


 50%|█████     | 12609/25000 [05:03<04:33, 45.28it/s]

Ep 12600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 2


 51%|█████     | 12709/25000 [05:05<04:31, 45.30it/s]

Ep 12700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 2


 51%|█████     | 12804/25000 [05:07<04:39, 43.64it/s]

Ep 12800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 52%|█████▏    | 12907/25000 [05:10<05:38, 35.73it/s]

Ep 12900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.5, First Action 2


 52%|█████▏    | 13008/25000 [05:12<04:25, 45.19it/s]

Ep 13000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 2


 52%|█████▏    | 13108/25000 [05:15<04:24, 44.93it/s]

Ep 13100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.0, First Action 2


 53%|█████▎    | 13208/25000 [05:17<04:22, 44.92it/s]

Ep 13200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 2


 53%|█████▎    | 13308/25000 [05:19<04:19, 45.12it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.3, First Action 2


 54%|█████▎    | 13408/25000 [05:21<04:14, 45.54it/s]

Ep 13400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 54%|█████▍    | 13507/25000 [05:24<05:19, 35.96it/s]

Ep 13500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 2


 54%|█████▍    | 13608/25000 [05:27<04:21, 43.57it/s]

Ep 13600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 2


 55%|█████▍    | 13708/25000 [05:29<04:13, 44.49it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 55%|█████▌    | 13808/25000 [05:31<04:05, 45.55it/s]

Ep 13800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 56%|█████▌    | 13908/25000 [05:34<04:02, 45.73it/s]

Ep 13900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


 56%|█████▌    | 14008/25000 [05:36<04:06, 44.55it/s]

Ep 14000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 56%|█████▋    | 14105/25000 [05:38<04:52, 37.26it/s]

Ep 14100/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.1, First Action 9


 57%|█████▋    | 14208/25000 [05:41<04:06, 43.77it/s]

Ep 14200/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.9, First Action 9


 57%|█████▋    | 14308/25000 [05:44<03:59, 44.70it/s]

Ep 14300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 58%|█████▊    | 14408/25000 [05:46<03:58, 44.38it/s]

Ep 14400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 58%|█████▊    | 14508/25000 [05:48<03:52, 45.22it/s]

Ep 14500/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.1, First Action 9


 58%|█████▊    | 14608/25000 [05:50<03:50, 45.09it/s]

Ep 14600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 59%|█████▉    | 14705/25000 [05:53<04:43, 36.32it/s]

Ep 14700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 9


 59%|█████▉    | 14806/25000 [05:56<04:00, 42.43it/s]

Ep 14800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 9


 60%|█████▉    | 14906/25000 [05:58<03:44, 45.02it/s]

Ep 14900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 60%|██████    | 15006/25000 [06:00<03:45, 44.40it/s]

Ep 15000/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 9


 60%|██████    | 15106/25000 [06:02<03:39, 45.11it/s]

Ep 15100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 61%|██████    | 15206/25000 [06:05<03:36, 45.24it/s]

Ep 15200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 61%|██████    | 15306/25000 [06:07<04:13, 38.19it/s]

Ep 15300/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.1, First Action 9


 62%|██████▏   | 15405/25000 [06:10<04:38, 34.44it/s]

Ep 15400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 9


 62%|██████▏   | 15505/25000 [06:12<03:26, 46.08it/s]

Ep 15500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 62%|██████▏   | 15605/25000 [06:14<03:31, 44.46it/s]

Ep 15600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 63%|██████▎   | 15705/25000 [06:17<03:24, 45.38it/s]

Ep 15700/25000, Opt. Action: 0, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.2, First Action 9


 63%|██████▎   | 15805/25000 [06:19<03:27, 44.32it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 64%|██████▎   | 15905/25000 [06:21<03:56, 38.53it/s]

Ep 15900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


 64%|██████▍   | 16005/25000 [06:24<04:40, 32.06it/s]

Ep 16000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 64%|██████▍   | 16104/25000 [06:27<03:20, 44.44it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 65%|██████▍   | 16209/25000 [06:29<03:17, 44.48it/s]

Ep 16200/25000, Opt. Action: 9, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 9


 65%|██████▌   | 16309/25000 [06:31<03:16, 44.30it/s]

Ep 16300/25000, Opt. Action: 8, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 9


 66%|██████▌   | 16409/25000 [06:33<03:10, 45.14it/s]

Ep 16400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 66%|██████▌   | 16507/25000 [06:36<03:48, 37.16it/s]

Ep 16500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 66%|██████▋   | 16603/25000 [06:39<04:14, 33.05it/s]

Ep 16600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 9


 67%|██████▋   | 16705/25000 [06:41<03:03, 45.26it/s]

Ep 16700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 67%|██████▋   | 16805/25000 [06:43<02:59, 45.54it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.1, First Action 9


 68%|██████▊   | 16905/25000 [06:46<02:56, 45.76it/s]

Ep 16900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 68%|██████▊   | 17005/25000 [06:48<03:03, 43.68it/s]

Ep 17000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 9


 68%|██████▊   | 17107/25000 [06:50<03:26, 38.25it/s]

Ep 17100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 69%|██████▉   | 17203/25000 [06:53<03:49, 33.94it/s]

Ep 17200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 6


 69%|██████▉   | 17306/25000 [06:56<02:50, 45.02it/s]

Ep 17300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 8


 70%|██████▉   | 17406/25000 [06:58<02:50, 44.65it/s]

Ep 17400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 8


 70%|███████   | 17506/25000 [07:00<02:46, 45.03it/s]

Ep 17500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 6


 70%|███████   | 17606/25000 [07:02<02:41, 45.66it/s]

Ep 17600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.5, First Action 6


 71%|███████   | 17706/25000 [07:05<03:21, 36.11it/s]

Ep 17700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 6


 71%|███████   | 17805/25000 [07:08<03:54, 30.74it/s]

Ep 17800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 72%|███████▏  | 17907/25000 [07:10<02:37, 44.93it/s]

Ep 17900/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.0, First Action 6


 72%|███████▏  | 18007/25000 [07:12<02:34, 45.40it/s]

Ep 18000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 6


 72%|███████▏  | 18107/25000 [07:15<02:34, 44.75it/s]

Ep 18100/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.9, First Action 6


 73%|███████▎  | 18207/25000 [07:17<02:32, 44.67it/s]

Ep 18200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 6


 73%|███████▎  | 18304/25000 [07:19<03:07, 35.62it/s]

Ep 18300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.6, First Action 6


 74%|███████▎  | 18404/25000 [07:22<03:21, 32.68it/s]

Ep 18400/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.7, First Action 6


 74%|███████▍  | 18505/25000 [07:25<02:26, 44.30it/s]

Ep 18500/25000, Opt. Action: 6, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 15.7, First Action 6


 74%|███████▍  | 18605/25000 [07:27<02:25, 44.01it/s]

Ep 18600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 6


 75%|███████▍  | 18705/25000 [07:29<02:17, 45.70it/s]

Ep 18700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 75%|███████▌  | 18805/25000 [07:31<02:19, 44.53it/s]

Ep 18800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 76%|███████▌  | 18907/25000 [07:34<02:44, 36.98it/s]

Ep 18900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 76%|███████▌  | 19005/25000 [07:37<03:04, 32.45it/s]

Ep 19000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 76%|███████▋  | 19108/25000 [07:39<02:16, 43.28it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 2


 77%|███████▋  | 19208/25000 [07:41<02:07, 45.49it/s]

Ep 19200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 77%|███████▋  | 19308/25000 [07:44<02:05, 45.35it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 78%|███████▊  | 19408/25000 [07:46<02:05, 44.57it/s]

Ep 19400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 78%|███████▊  | 19507/25000 [07:48<02:31, 36.14it/s]

Ep 19500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 2


 78%|███████▊  | 19604/25000 [07:51<02:31, 35.52it/s]

Ep 19600/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 79%|███████▉  | 19705/25000 [07:54<01:56, 45.42it/s]

Ep 19700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 79%|███████▉  | 19805/25000 [07:56<01:57, 44.30it/s]

Ep 19800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 80%|███████▉  | 19905/25000 [07:58<01:56, 43.64it/s]

Ep 19900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 2


 80%|████████  | 20005/25000 [08:00<01:49, 45.67it/s]

Ep 20000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 2


 80%|████████  | 20104/25000 [08:03<02:10, 37.55it/s]

Ep 20100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 81%|████████  | 20204/25000 [08:05<02:18, 34.66it/s]

Ep 20200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 81%|████████  | 20307/25000 [08:08<01:46, 44.23it/s]

Ep 20300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 82%|████████▏ | 20407/25000 [08:10<01:43, 44.20it/s]

Ep 20400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 2


 82%|████████▏ | 20507/25000 [08:12<01:40, 44.88it/s]

Ep 20500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 82%|████████▏ | 20607/25000 [08:15<01:39, 44.35it/s]

Ep 20600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 83%|████████▎ | 20707/25000 [08:17<01:49, 39.32it/s]

Ep 20700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 83%|████████▎ | 20804/25000 [08:20<01:51, 37.49it/s]

Ep 20800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 84%|████████▎ | 20906/25000 [08:22<01:32, 44.03it/s]

Ep 20900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 84%|████████▍ | 21006/25000 [08:25<01:28, 45.04it/s]

Ep 21000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 84%|████████▍ | 21106/25000 [08:27<01:29, 43.38it/s]

Ep 21100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 85%|████████▍ | 21206/25000 [08:29<01:24, 45.01it/s]

Ep 21200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 85%|████████▌ | 21306/25000 [08:31<01:21, 45.12it/s]

Ep 21300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 86%|████████▌ | 21404/25000 [08:34<01:38, 36.67it/s]

Ep 21400/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 2


 86%|████████▌ | 21509/25000 [08:37<01:21, 42.87it/s]

Ep 21500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 86%|████████▋ | 21609/25000 [08:39<01:14, 45.23it/s]

Ep 21600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 87%|████████▋ | 21709/25000 [08:41<01:13, 44.79it/s]

Ep 21700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 87%|████████▋ | 21809/25000 [08:44<01:10, 45.33it/s]

Ep 21800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 88%|████████▊ | 21909/25000 [08:46<01:11, 43.30it/s]

Ep 21900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 88%|████████▊ | 22006/25000 [08:49<01:20, 37.31it/s]

Ep 22000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 88%|████████▊ | 22109/25000 [08:51<01:09, 41.56it/s]

Ep 22100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 89%|████████▉ | 22209/25000 [08:54<01:02, 44.79it/s]

Ep 22200/25000, Opt. Action: 2, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 2


 89%|████████▉ | 22309/25000 [08:56<01:00, 44.84it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 90%|████████▉ | 22404/25000 [08:58<01:01, 42.39it/s]

Ep 22400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


 90%|█████████ | 22509/25000 [09:01<00:54, 45.30it/s]

Ep 22500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 2


 90%|█████████ | 22606/25000 [09:03<01:06, 36.21it/s]

Ep 22600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 91%|█████████ | 22703/25000 [09:06<01:08, 33.45it/s]

Ep 22700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 91%|█████████ | 22807/25000 [09:08<00:48, 44.85it/s]

Ep 22800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 2


 92%|█████████▏| 22907/25000 [09:10<00:46, 45.30it/s]

Ep 22900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 3


 92%|█████████▏| 23007/25000 [09:13<00:43, 45.87it/s]

Ep 23000/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.4, First Action 3


 92%|█████████▏| 23107/25000 [09:15<00:42, 44.11it/s]

Ep 23100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 93%|█████████▎| 23206/25000 [09:17<00:50, 35.83it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 16.8, First Action 3


 93%|█████████▎| 23306/25000 [09:20<00:49, 33.91it/s]

Ep 23300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 3


 94%|█████████▎| 23407/25000 [09:23<00:34, 45.72it/s]

Ep 23400/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.7, First Action 3


 94%|█████████▍| 23507/25000 [09:25<00:33, 44.45it/s]

Ep 23500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 3


 94%|█████████▍| 23607/25000 [09:27<00:31, 44.81it/s]

Ep 23600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 95%|█████████▍| 23707/25000 [09:29<00:29, 43.65it/s]

Ep 23700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 95%|█████████▌| 23805/25000 [09:32<00:32, 36.24it/s]

Ep 23800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 96%|█████████▌| 23905/25000 [09:35<00:33, 32.72it/s]

Ep 23900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 3


 96%|█████████▌| 24008/25000 [09:37<00:21, 45.53it/s]

Ep 24000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 3


 96%|█████████▋| 24108/25000 [09:39<00:19, 45.47it/s]

Ep 24100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


 97%|█████████▋| 24208/25000 [09:42<00:17, 45.08it/s]

Ep 24200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 97%|█████████▋| 24308/25000 [09:44<00:15, 43.85it/s]

Ep 24300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 98%|█████████▊| 24405/25000 [09:46<00:17, 34.35it/s]

Ep 24400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


 98%|█████████▊| 24507/25000 [09:49<00:14, 32.93it/s]

Ep 24500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 3


 98%|█████████▊| 24607/25000 [09:52<00:08, 44.53it/s]

Ep 24600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 99%|█████████▉| 24707/25000 [09:54<00:06, 44.95it/s]

Ep 24700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 99%|█████████▉| 24807/25000 [09:56<00:04, 45.15it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


100%|█████████▉| 24907/25000 [09:58<00:02, 45.49it/s]

Ep 24900/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.6, First Action 3


100%|██████████| 25000/25000 [10:01<00:00, 41.59it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3

TEST:


 41%|████      | 123/300 [00:01<00:01, 121.45it/s]

Ep 100/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 3


 71%|███████   | 212/300 [00:01<00:00, 112.58it/s]

Ep 200/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


100%|██████████| 300/300 [00:02<00:00, 114.32it/s]


Ep 300/300, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.2, First Action 3

GAMMA 1 - LR 0.01 - Entropy Decay False


  0%|          | 108/25000 [00:02<09:02, 45.87it/s]

Ep 100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  1%|          | 208/25000 [00:05<09:01, 45.75it/s]

Ep 200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


  1%|          | 308/25000 [00:07<09:13, 44.61it/s]

Ep 300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


  2%|▏         | 408/25000 [00:09<08:58, 45.70it/s]

Ep 400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


  2%|▏         | 503/25000 [00:11<10:13, 39.96it/s]

Ep 500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  2%|▏         | 605/25000 [00:14<11:10, 36.38it/s]

Ep 600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  3%|▎         | 705/25000 [00:17<09:10, 44.15it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  3%|▎         | 805/25000 [00:19<09:00, 44.74it/s]

Ep 800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  4%|▎         | 905/25000 [00:21<09:08, 43.96it/s]

Ep 900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  4%|▍         | 1005/25000 [00:23<08:45, 45.68it/s]

Ep 1000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


  4%|▍         | 1105/25000 [00:26<08:52, 44.87it/s]

Ep 1100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  5%|▍         | 1207/25000 [00:28<10:58, 36.15it/s]

Ep 1200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


  5%|▌         | 1309/25000 [00:31<09:06, 43.33it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


  6%|▌         | 1409/25000 [00:33<08:52, 44.29it/s]

Ep 1400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  6%|▌         | 1509/25000 [00:36<09:00, 43.49it/s]

Ep 1500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 4


  6%|▋         | 1609/25000 [00:38<08:40, 44.95it/s]

Ep 1600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


  7%|▋         | 1709/25000 [00:40<08:43, 44.50it/s]

Ep 1700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  7%|▋         | 1807/25000 [00:43<11:23, 33.95it/s]

Ep 1800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  8%|▊         | 1905/25000 [00:46<09:09, 42.01it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


  8%|▊         | 2005/25000 [00:48<08:34, 44.72it/s]

Ep 2000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  8%|▊         | 2105/25000 [00:50<08:22, 45.52it/s]

Ep 2100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


  9%|▉         | 2205/25000 [00:52<08:22, 45.37it/s]

Ep 2200/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


  9%|▉         | 2305/25000 [00:55<08:27, 44.75it/s]

Ep 2300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 10%|▉         | 2404/25000 [00:57<10:42, 35.18it/s]

Ep 2400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 10%|█         | 2506/25000 [01:00<09:30, 39.44it/s]

Ep 2500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 10%|█         | 2605/25000 [01:02<08:28, 44.00it/s]

Ep 2600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


 11%|█         | 2705/25000 [01:05<08:24, 44.17it/s]

Ep 2700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 11%|█         | 2805/25000 [01:07<08:22, 44.21it/s]

Ep 2800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 12%|█▏        | 2905/25000 [01:09<08:08, 45.24it/s]

Ep 2900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 12%|█▏        | 3004/25000 [01:12<09:48, 37.36it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 12%|█▏        | 3104/25000 [01:14<10:24, 35.07it/s]

Ep 3100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 13%|█▎        | 3207/25000 [01:17<08:08, 44.66it/s]

Ep 3200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 13%|█▎        | 3307/25000 [01:19<08:03, 44.86it/s]

Ep 3300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 14%|█▎        | 3407/25000 [01:21<08:03, 44.70it/s]

Ep 3400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 14%|█▍        | 3507/25000 [01:24<08:09, 43.93it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 14%|█▍        | 3604/25000 [01:26<09:39, 36.92it/s]

Ep 3600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 15%|█▍        | 3704/25000 [01:29<10:22, 34.18it/s]

Ep 3700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 15%|█▌        | 3808/25000 [01:31<08:00, 44.09it/s]

Ep 3800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 16%|█▌        | 3908/25000 [01:34<08:13, 42.73it/s]

Ep 3900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 16%|█▌        | 4008/25000 [01:36<07:45, 45.08it/s]

Ep 4000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 16%|█▋        | 4108/25000 [01:38<07:43, 45.04it/s]

Ep 4100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 17%|█▋        | 4206/25000 [01:41<09:39, 35.88it/s]

Ep 4200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 17%|█▋        | 4306/25000 [01:43<10:09, 33.95it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 18%|█▊        | 4407/25000 [01:46<07:26, 46.10it/s]

Ep 4400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 18%|█▊        | 4507/25000 [01:48<07:33, 45.22it/s]

Ep 4500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 4


 18%|█▊        | 4607/25000 [01:50<07:31, 45.16it/s]

Ep 4600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 19%|█▉        | 4707/25000 [01:53<07:29, 45.14it/s]

Ep 4700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 19%|█▉        | 4806/25000 [01:55<08:57, 37.55it/s]

Ep 4800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 20%|█▉        | 4905/25000 [01:58<09:40, 34.60it/s]

Ep 4900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 20%|██        | 5005/25000 [02:00<07:25, 44.86it/s]

Ep 5000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 20%|██        | 5105/25000 [02:03<07:27, 44.45it/s]

Ep 5100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 21%|██        | 5205/25000 [02:05<07:25, 44.42it/s]

Ep 5200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 21%|██        | 5305/25000 [02:07<07:16, 45.09it/s]

Ep 5300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 22%|██▏       | 5405/25000 [02:09<07:08, 45.74it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 22%|██▏       | 5505/25000 [02:12<08:45, 37.12it/s]

Ep 5500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 22%|██▏       | 5606/25000 [02:15<07:27, 43.37it/s]

Ep 5600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 23%|██▎       | 5706/25000 [02:17<06:59, 45.95it/s]

Ep 5700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 23%|██▎       | 5806/25000 [02:19<07:07, 44.87it/s]

Ep 5800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 24%|██▎       | 5906/25000 [02:22<07:10, 44.33it/s]

Ep 5900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


 24%|██▍       | 6006/25000 [02:24<07:02, 45.00it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 24%|██▍       | 6105/25000 [02:26<08:20, 37.78it/s]

Ep 6100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 25%|██▍       | 6205/25000 [02:29<07:36, 41.18it/s]

Ep 6200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 25%|██▌       | 6305/25000 [02:31<06:48, 45.72it/s]

Ep 6300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 26%|██▌       | 6405/25000 [02:34<07:10, 43.20it/s]

Ep 6400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 26%|██▌       | 6505/25000 [02:36<06:59, 44.05it/s]

Ep 6500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 26%|██▋       | 6605/25000 [02:38<06:45, 45.35it/s]

Ep 6600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 27%|██▋       | 6704/25000 [02:41<07:59, 38.13it/s]

Ep 6700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 27%|██▋       | 6806/25000 [02:44<08:18, 36.49it/s]

Ep 6800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 28%|██▊       | 6906/25000 [02:46<06:37, 45.57it/s]

Ep 6900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 28%|██▊       | 7006/25000 [02:48<06:32, 45.81it/s]

Ep 7000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 4


 28%|██▊       | 7106/25000 [02:51<06:31, 45.65it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 29%|██▉       | 7206/25000 [02:53<06:48, 43.61it/s]

Ep 7200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 29%|██▉       | 7307/25000 [02:56<08:08, 36.25it/s]

Ep 7300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 30%|██▉       | 7408/25000 [02:58<07:54, 37.04it/s]

Ep 7400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 30%|███       | 7507/25000 [03:01<06:42, 43.46it/s]

Ep 7500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 30%|███       | 7607/25000 [03:03<06:24, 45.26it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 31%|███       | 7707/25000 [03:05<06:24, 44.99it/s]

Ep 7700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 31%|███       | 7807/25000 [03:07<06:28, 44.22it/s]

Ep 7800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 32%|███▏      | 7905/25000 [03:10<07:32, 37.76it/s]

Ep 7900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 32%|███▏      | 8006/25000 [03:13<08:04, 35.08it/s]

Ep 8000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 32%|███▏      | 8109/25000 [03:15<06:13, 45.20it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 33%|███▎      | 8204/25000 [03:17<06:23, 43.75it/s]

Ep 8200/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 4


 33%|███▎      | 8309/25000 [03:20<06:17, 44.20it/s]

Ep 8300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 34%|███▎      | 8409/25000 [03:22<06:07, 45.20it/s]

Ep 8400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 34%|███▍      | 8503/25000 [03:24<07:42, 35.69it/s]

Ep 8500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 34%|███▍      | 8606/25000 [03:27<08:23, 32.53it/s]

Ep 8600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 35%|███▍      | 8706/25000 [03:30<06:06, 44.51it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 35%|███▌      | 8806/25000 [03:32<06:03, 44.50it/s]

Ep 8800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 36%|███▌      | 8906/25000 [03:34<05:56, 45.16it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 36%|███▌      | 9006/25000 [03:37<06:07, 43.49it/s]

Ep 9000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 36%|███▋      | 9105/25000 [03:39<07:04, 37.43it/s]

Ep 9100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 37%|███▋      | 9203/25000 [03:42<07:41, 34.20it/s]

Ep 9200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 37%|███▋      | 9305/25000 [03:44<05:47, 45.14it/s]

Ep 9300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 38%|███▊      | 9405/25000 [03:47<05:41, 45.65it/s]

Ep 9400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 38%|███▊      | 9505/25000 [03:49<05:45, 44.83it/s]

Ep 9500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 38%|███▊      | 9605/25000 [03:51<05:36, 45.70it/s]

Ep 9600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 39%|███▉      | 9706/25000 [03:53<07:12, 35.40it/s]

Ep 9700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 39%|███▉      | 9807/25000 [03:56<07:18, 34.61it/s]

Ep 9800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 40%|███▉      | 9906/25000 [03:59<05:39, 44.43it/s]

Ep 9900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 40%|████      | 10006/25000 [04:01<05:38, 44.33it/s]

Ep 10000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 40%|████      | 10106/25000 [04:03<05:28, 45.30it/s]

Ep 10100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 41%|████      | 10206/25000 [04:05<05:21, 45.98it/s]

Ep 10200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 41%|████      | 10305/25000 [04:08<06:32, 37.49it/s]

Ep 10300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 42%|████▏     | 10406/25000 [04:11<06:54, 35.19it/s]

Ep 10400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 42%|████▏     | 10508/25000 [04:13<05:21, 45.12it/s]

Ep 10500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 42%|████▏     | 10608/25000 [04:16<05:35, 42.83it/s]

Ep 10600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 43%|████▎     | 10708/25000 [04:18<05:18, 44.88it/s]

Ep 10700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 43%|████▎     | 10808/25000 [04:20<05:18, 44.57it/s]

Ep 10800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 4


 44%|████▎     | 10903/25000 [04:22<05:18, 44.20it/s]

Ep 10900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 44%|████▍     | 11005/25000 [04:25<06:05, 38.24it/s]

Ep 11000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 44%|████▍     | 11108/25000 [04:28<05:22, 43.08it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 45%|████▍     | 11208/25000 [04:30<05:07, 44.87it/s]

Ep 11200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 45%|████▌     | 11308/25000 [04:32<05:03, 45.19it/s]

Ep 11300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 46%|████▌     | 11408/25000 [04:35<05:07, 44.16it/s]

Ep 11400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 46%|████▌     | 11508/25000 [04:37<04:59, 45.12it/s]

Ep 11500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 46%|████▋     | 11607/25000 [04:40<06:08, 36.39it/s]

Ep 11600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 47%|████▋     | 11708/25000 [04:42<05:13, 42.35it/s]

Ep 11700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 47%|████▋     | 11808/25000 [04:45<05:02, 43.62it/s]

Ep 11800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 48%|████▊     | 11908/25000 [04:47<04:56, 44.15it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 48%|████▊     | 12008/25000 [04:49<04:44, 45.63it/s]

Ep 12000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 48%|████▊     | 12108/25000 [04:51<04:50, 44.33it/s]

Ep 12100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 49%|████▉     | 12206/25000 [04:54<06:01, 35.39it/s]

Ep 12200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 49%|████▉     | 12307/25000 [04:57<05:13, 40.47it/s]

Ep 12300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 50%|████▉     | 12407/25000 [04:59<04:40, 44.85it/s]

Ep 12400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 4


 50%|█████     | 12507/25000 [05:01<04:36, 45.23it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 50%|█████     | 12607/25000 [05:04<04:43, 43.66it/s]

Ep 12600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 4


 51%|█████     | 12707/25000 [05:06<04:36, 44.50it/s]

Ep 12700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 51%|█████     | 12804/25000 [05:09<05:50, 34.80it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 52%|█████▏    | 12905/25000 [05:11<04:47, 42.09it/s]

Ep 12900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 52%|█████▏    | 13005/25000 [05:14<04:40, 42.73it/s]

Ep 13000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 52%|█████▏    | 13105/25000 [05:16<04:30, 43.91it/s]

Ep 13100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 53%|█████▎    | 13205/25000 [05:18<04:23, 44.74it/s]

Ep 13200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 53%|█████▎    | 13305/25000 [05:20<04:12, 46.34it/s]

Ep 13300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 4


 54%|█████▎    | 13405/25000 [05:23<05:22, 35.94it/s]

Ep 13400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 4


 54%|█████▍    | 13507/25000 [05:26<05:15, 36.46it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


 54%|█████▍    | 13607/25000 [05:28<04:20, 43.68it/s]

Ep 13600/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.6, First Action 4


 55%|█████▍    | 13707/25000 [05:30<04:10, 45.13it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 55%|█████▌    | 13807/25000 [05:33<04:11, 44.49it/s]

Ep 13800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 56%|█████▌    | 13907/25000 [05:35<04:05, 45.18it/s]

Ep 13900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 56%|█████▌    | 14005/25000 [05:38<04:56, 37.03it/s]

Ep 14000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 56%|█████▋    | 14105/25000 [05:41<05:17, 34.29it/s]

Ep 14100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 57%|█████▋    | 14209/25000 [05:43<04:03, 44.28it/s]

Ep 14200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 4


 57%|█████▋    | 14309/25000 [05:45<03:56, 45.21it/s]

Ep 14300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 58%|█████▊    | 14409/25000 [05:47<03:56, 44.86it/s]

Ep 14400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 4


 58%|█████▊    | 14504/25000 [05:50<04:00, 43.71it/s]

Ep 14500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 58%|█████▊    | 14606/25000 [05:52<04:34, 37.92it/s]

Ep 14600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 59%|█████▉    | 14706/25000 [05:55<05:04, 33.80it/s]

Ep 14700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 59%|█████▉    | 14806/25000 [05:57<03:48, 44.62it/s]

Ep 14800/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 4


 60%|█████▉    | 14906/25000 [06:00<03:56, 42.67it/s]

Ep 14900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 60%|██████    | 15006/25000 [06:02<03:43, 44.68it/s]

Ep 15000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 60%|██████    | 15106/25000 [06:04<03:40, 44.96it/s]

Ep 15100/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 4


 61%|██████    | 15204/25000 [06:07<04:24, 37.03it/s]

Ep 15200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 61%|██████    | 15305/25000 [06:09<04:41, 34.40it/s]

Ep 15300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 62%|██████▏   | 15407/25000 [06:12<03:37, 44.15it/s]

Ep 15400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 62%|██████▏   | 15507/25000 [06:14<03:33, 44.56it/s]

Ep 15500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 62%|██████▏   | 15607/25000 [06:16<03:41, 42.31it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 63%|██████▎   | 15707/25000 [06:19<03:27, 44.77it/s]

Ep 15700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 63%|██████▎   | 15807/25000 [06:21<04:08, 36.99it/s]

Ep 15800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 64%|██████▎   | 15904/25000 [06:24<04:38, 32.69it/s]

Ep 15900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 64%|██████▍   | 16008/25000 [06:27<03:21, 44.59it/s]

Ep 16000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 64%|██████▍   | 16108/25000 [06:29<03:18, 44.74it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 65%|██████▍   | 16208/25000 [06:31<03:14, 45.20it/s]

Ep 16200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 65%|██████▌   | 16308/25000 [06:33<03:14, 44.65it/s]

Ep 16300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 66%|██████▌   | 16406/25000 [06:36<03:59, 35.95it/s]

Ep 16400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 66%|██████▌   | 16506/25000 [06:38<04:07, 34.33it/s]

Ep 16500/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 4


 66%|██████▋   | 16607/25000 [06:41<03:14, 43.13it/s]

Ep 16600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 67%|██████▋   | 16707/25000 [06:43<03:06, 44.48it/s]

Ep 16700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 67%|██████▋   | 16807/25000 [06:46<03:06, 43.84it/s]

Ep 16800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 68%|██████▊   | 16907/25000 [06:48<03:01, 44.60it/s]

Ep 16900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 4


 68%|██████▊   | 17007/25000 [06:50<03:10, 42.06it/s]

Ep 17000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 68%|██████▊   | 17105/25000 [06:53<03:37, 36.33it/s]

Ep 17100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 69%|██████▉   | 17207/25000 [06:56<03:07, 41.46it/s]

Ep 17200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 69%|██████▉   | 17307/25000 [06:58<02:52, 44.52it/s]

Ep 17300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 4


 70%|██████▉   | 17407/25000 [07:00<02:48, 44.98it/s]

Ep 17400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 70%|███████   | 17507/25000 [07:03<02:49, 44.28it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 70%|███████   | 17607/25000 [07:05<02:45, 44.77it/s]

Ep 17600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 71%|███████   | 17705/25000 [07:07<03:19, 36.61it/s]

Ep 17700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 71%|███████   | 17804/25000 [07:10<03:05, 38.82it/s]

Ep 17800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 72%|███████▏  | 17907/25000 [07:13<02:43, 43.51it/s]

Ep 17900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 72%|███████▏  | 18007/25000 [07:15<02:38, 44.22it/s]

Ep 18000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 72%|███████▏  | 18107/25000 [07:17<02:37, 43.78it/s]

Ep 18100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 4


 73%|███████▎  | 18207/25000 [07:19<02:29, 45.39it/s]

Ep 18200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 4


 73%|███████▎  | 18307/25000 [07:22<02:57, 37.63it/s]

Ep 18300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 74%|███████▎  | 18403/25000 [07:25<03:24, 32.28it/s]

Ep 18400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


 74%|███████▍  | 18509/25000 [07:27<02:25, 44.66it/s]

Ep 18500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 74%|███████▍  | 18609/25000 [07:30<02:25, 43.97it/s]

Ep 18600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 75%|███████▍  | 18709/25000 [07:32<02:21, 44.44it/s]

Ep 18700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 75%|███████▌  | 18809/25000 [07:34<02:27, 41.94it/s]

Ep 18800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 76%|███████▌  | 18905/25000 [07:37<02:51, 35.53it/s]

Ep 18900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 76%|███████▌  | 19007/25000 [07:40<02:47, 35.71it/s]

Ep 19000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 76%|███████▋  | 19105/25000 [07:42<02:12, 44.52it/s]

Ep 19100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 77%|███████▋  | 19205/25000 [07:45<02:15, 42.76it/s]

Ep 19200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 77%|███████▋  | 19305/25000 [07:47<02:09, 43.90it/s]

Ep 19300/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 4


 78%|███████▊  | 19405/25000 [07:49<02:04, 44.80it/s]

Ep 19400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 78%|███████▊  | 19506/25000 [07:52<02:25, 37.67it/s]

Ep 19500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 78%|███████▊  | 19606/25000 [07:55<02:36, 34.40it/s]

Ep 19600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 79%|███████▉  | 19704/25000 [07:57<02:01, 43.43it/s]

Ep 19700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 79%|███████▉  | 19809/25000 [07:59<01:53, 45.57it/s]

Ep 19800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 80%|███████▉  | 19904/25000 [08:02<01:58, 42.85it/s]

Ep 19900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 80%|████████  | 20009/25000 [08:04<01:54, 43.73it/s]

Ep 20000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 80%|████████  | 20105/25000 [08:06<02:18, 35.30it/s]

Ep 20100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 81%|████████  | 20205/25000 [08:09<02:23, 33.47it/s]

Ep 20200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 81%|████████  | 20307/25000 [08:12<01:48, 43.34it/s]

Ep 20300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 82%|████████▏ | 20407/25000 [08:14<01:45, 43.57it/s]

Ep 20400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 4


 82%|████████▏ | 20507/25000 [08:16<01:41, 44.37it/s]

Ep 20500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 82%|████████▏ | 20607/25000 [08:19<01:40, 43.63it/s]

Ep 20600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 83%|████████▎ | 20706/25000 [08:21<02:00, 35.62it/s]

Ep 20700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 83%|████████▎ | 20806/25000 [08:24<02:09, 32.43it/s]

Ep 20800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 84%|████████▎ | 20905/25000 [08:27<01:36, 42.49it/s]

Ep 20900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 84%|████████▍ | 21005/25000 [08:29<01:30, 44.11it/s]

Ep 21000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 84%|████████▍ | 21105/25000 [08:31<01:27, 44.54it/s]

Ep 21100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 85%|████████▍ | 21205/25000 [08:33<01:23, 45.29it/s]

Ep 21200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 85%|████████▌ | 21304/25000 [08:36<01:43, 35.80it/s]

Ep 21300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 86%|████████▌ | 21404/25000 [08:39<01:49, 32.92it/s]

Ep 21400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 86%|████████▌ | 21509/25000 [08:41<01:17, 44.86it/s]

Ep 21500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 86%|████████▋ | 21609/25000 [08:44<01:16, 44.14it/s]

Ep 21600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 87%|████████▋ | 21709/25000 [08:46<01:12, 45.49it/s]

Ep 21700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 87%|████████▋ | 21809/25000 [08:48<01:12, 43.85it/s]

Ep 21800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 88%|████████▊ | 21906/25000 [08:51<01:24, 36.57it/s]

Ep 21900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 88%|████████▊ | 22006/25000 [08:53<01:30, 33.17it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 88%|████████▊ | 22109/25000 [08:56<01:06, 43.75it/s]

Ep 22100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.6, First Action 4


 89%|████████▉ | 22209/25000 [08:58<01:01, 45.03it/s]

Ep 22200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 89%|████████▉ | 22309/25000 [09:01<01:00, 44.55it/s]

Ep 22300/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 4


 90%|████████▉ | 22404/25000 [09:03<00:58, 44.33it/s]

Ep 22400/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.0, First Action 4


 90%|█████████ | 22507/25000 [09:05<01:06, 37.21it/s]

Ep 22500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 4


 90%|█████████ | 22603/25000 [09:08<01:06, 36.29it/s]

Ep 22600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 91%|█████████ | 22705/25000 [09:11<00:51, 44.91it/s]

Ep 22700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 91%|█████████ | 22805/25000 [09:13<00:49, 44.06it/s]

Ep 22800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 4


 92%|█████████▏| 22905/25000 [09:15<00:46, 44.68it/s]

Ep 22900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 92%|█████████▏| 23005/25000 [09:17<00:44, 45.21it/s]

Ep 23000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 92%|█████████▏| 23105/25000 [09:20<00:49, 38.04it/s]

Ep 23100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 93%|█████████▎| 23205/25000 [09:22<00:46, 38.94it/s]

Ep 23200/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 4


 93%|█████████▎| 23305/25000 [09:25<00:38, 43.78it/s]

Ep 23300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 94%|█████████▎| 23405/25000 [09:27<00:35, 44.96it/s]

Ep 23400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.4, First Action 4


 94%|█████████▍| 23505/25000 [09:30<00:33, 44.17it/s]

Ep 23500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 94%|█████████▍| 23605/25000 [09:32<00:31, 44.28it/s]

Ep 23600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 95%|█████████▍| 23705/25000 [09:34<00:29, 43.40it/s]

Ep 23700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 95%|█████████▌| 23804/25000 [09:37<00:34, 34.70it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 96%|█████████▌| 23905/25000 [09:40<00:27, 40.05it/s]

Ep 23900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 4


 96%|█████████▌| 24005/25000 [09:42<00:23, 43.04it/s]

Ep 24000/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 4


 96%|█████████▋| 24105/25000 [09:44<00:19, 45.31it/s]

Ep 24100/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 4


 97%|█████████▋| 24205/25000 [09:47<00:18, 43.80it/s]

Ep 24200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 97%|█████████▋| 24305/25000 [09:49<00:16, 43.36it/s]

Ep 24300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 98%|█████████▊| 24407/25000 [09:52<00:16, 35.66it/s]

Ep 24400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 98%|█████████▊| 24508/25000 [09:54<00:13, 37.25it/s]

Ep 24500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 98%|█████████▊| 24607/25000 [09:57<00:08, 44.62it/s]

Ep 24600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 99%|█████████▉| 24707/25000 [09:59<00:06, 44.97it/s]

Ep 24700/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 4


 99%|█████████▉| 24807/25000 [10:01<00:04, 44.12it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


100%|█████████▉| 24907/25000 [10:03<00:02, 44.49it/s]

Ep 24900/25000, Opt. Action: 4, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.0, First Action 4


100%|██████████| 25000/25000 [10:06<00:00, 41.23it/s]


Ep 25000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4

TEST:


 38%|███▊      | 114/300 [00:01<00:01, 120.55it/s]

Ep 100/300, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 72%|███████▏  | 216/300 [00:02<00:00, 102.49it/s]

Ep 200/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


100%|██████████| 300/300 [00:02<00:00, 102.55it/s]


Ep 300/300, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4

GAMMA 0.8 - LR 0.001 - Entropy Decay True


  0%|          | 109/25000 [00:02<09:14, 44.86it/s]

Ep 100/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 4


  1%|          | 204/25000 [00:04<09:26, 43.76it/s]

Ep 200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 8


  1%|          | 304/25000 [00:06<09:40, 42.51it/s]

Ep 300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  2%|▏         | 409/25000 [00:09<08:51, 46.25it/s]

Ep 400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


  2%|▏         | 505/25000 [00:11<11:05, 36.78it/s]

Ep 500/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.6, First Action 3


  2%|▏         | 605/25000 [00:14<11:21, 35.80it/s]

Ep 600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


  3%|▎         | 707/25000 [00:17<09:11, 44.01it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


  3%|▎         | 807/25000 [00:19<09:11, 43.86it/s]

Ep 800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


  4%|▎         | 907/25000 [00:21<09:05, 44.14it/s]

Ep 900/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.2, First Action 1


  4%|▍         | 1007/25000 [00:23<08:57, 44.62it/s]

Ep 1000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


  4%|▍         | 1103/25000 [00:26<10:44, 37.08it/s]

Ep 1100/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


  5%|▍         | 1207/25000 [00:29<11:19, 35.04it/s]

Ep 1200/25000, Opt. Action: 3, Reward: 7.800000011920929, Cumulative-Regret: 17.19999998807907, AVG100-Regret: 18.4, First Action 1


  5%|▌         | 1308/25000 [00:31<09:03, 43.63it/s]

Ep 1300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  6%|▌         | 1408/25000 [00:33<08:48, 44.62it/s]

Ep 1400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


  6%|▌         | 1508/25000 [00:36<08:40, 45.11it/s]

Ep 1500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


  6%|▋         | 1608/25000 [00:38<08:38, 45.14it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 9


  7%|▋         | 1705/25000 [00:40<10:43, 36.21it/s]

Ep 1700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 6


  7%|▋         | 1807/25000 [00:43<11:20, 34.11it/s]

Ep 1800/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 2


  8%|▊         | 1908/25000 [00:46<08:33, 44.93it/s]

Ep 1900/25000, Opt. Action: 0, Reward: 12.100000001490116, Cumulative-Regret: 12.899999998509884, AVG100-Regret: 17.9, First Action 0


  8%|▊         | 2008/25000 [00:48<08:43, 43.88it/s]

Ep 2000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  8%|▊         | 2108/25000 [00:50<08:29, 44.95it/s]

Ep 2100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 9


  9%|▉         | 2208/25000 [00:53<08:24, 45.14it/s]

Ep 2200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 3


  9%|▉         | 2306/25000 [00:55<09:58, 37.94it/s]

Ep 2300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


 10%|▉         | 2404/25000 [00:58<11:30, 32.72it/s]

Ep 2400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 10%|█         | 2507/25000 [01:00<08:26, 44.41it/s]

Ep 2500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 10%|█         | 2607/25000 [01:03<08:25, 44.33it/s]

Ep 2600/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.3, First Action 6


 11%|█         | 2707/25000 [01:05<08:20, 44.51it/s]

Ep 2700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 11%|█         | 2807/25000 [01:07<08:28, 43.67it/s]

Ep 2800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 12%|█▏        | 2906/25000 [01:09<09:59, 36.86it/s]

Ep 2900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 12%|█▏        | 3006/25000 [01:12<09:57, 36.83it/s]

Ep 3000/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 2


 12%|█▏        | 3108/25000 [01:15<08:38, 42.24it/s]

Ep 3100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 13%|█▎        | 3208/25000 [01:17<08:19, 43.62it/s]

Ep 3200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 3


 13%|█▎        | 3308/25000 [01:20<08:12, 44.04it/s]

Ep 3300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 8


 14%|█▎        | 3408/25000 [01:22<08:09, 44.10it/s]

Ep 3400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 14%|█▍        | 3508/25000 [01:24<08:07, 44.06it/s]

Ep 3500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 14%|█▍        | 3607/25000 [01:27<09:52, 36.10it/s]

Ep 3600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 15%|█▍        | 3704/25000 [01:30<08:15, 42.97it/s]

Ep 3700/25000, Opt. Action: 0, Reward: 8.100000001490116, Cumulative-Regret: 16.899999998509884, AVG100-Regret: 18.4, First Action 10


 15%|█▌        | 3804/25000 [01:32<08:19, 42.42it/s]

Ep 3800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 16%|█▌        | 3909/25000 [01:34<07:57, 44.19it/s]

Ep 3900/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 7


 16%|█▌        | 4009/25000 [01:37<07:58, 43.83it/s]

Ep 4000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 16%|█▋        | 4104/25000 [01:39<07:55, 43.97it/s]

Ep 4100/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.3, First Action 6


 17%|█▋        | 4207/25000 [01:42<09:31, 36.41it/s]

Ep 4200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 17%|█▋        | 4309/25000 [01:45<08:52, 38.85it/s]

Ep 4300/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


 18%|█▊        | 4408/25000 [01:47<08:01, 42.78it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 18%|█▊        | 4508/25000 [01:49<07:49, 43.65it/s]

Ep 4500/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 17.9, First Action 1


 18%|█▊        | 4608/25000 [01:52<07:30, 45.27it/s]

Ep 4600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 19%|█▉        | 4708/25000 [01:54<07:38, 44.21it/s]

Ep 4700/25000, Opt. Action: 4, Reward: 8.5, Cumulative-Regret: 16.5, AVG100-Regret: 18.5, First Action 6


 19%|█▉        | 4804/25000 [01:56<10:06, 33.33it/s]

Ep 4800/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.2, First Action 10


 20%|█▉        | 4904/25000 [01:59<10:05, 33.17it/s]

Ep 4900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 20%|██        | 5007/25000 [02:02<07:32, 44.18it/s]

Ep 5000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 1


 20%|██        | 5107/25000 [02:04<07:33, 43.91it/s]

Ep 5100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 21%|██        | 5207/25000 [02:06<07:21, 44.80it/s]

Ep 5200/25000, Opt. Action: 0, Reward: 8.100000001490116, Cumulative-Regret: 16.899999998509884, AVG100-Regret: 18.3, First Action 2


 21%|██        | 5307/25000 [02:09<07:34, 43.33it/s]

Ep 5300/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.5, First Action 9


 22%|██▏       | 5405/25000 [02:11<08:35, 37.99it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 22%|██▏       | 5503/25000 [02:14<09:43, 33.43it/s]

Ep 5500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 22%|██▏       | 5609/25000 [02:17<07:20, 44.02it/s]

Ep 5600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 23%|██▎       | 5709/25000 [02:19<07:16, 44.23it/s]

Ep 5700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 6


 23%|██▎       | 5809/25000 [02:21<07:21, 43.49it/s]

Ep 5800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 24%|██▎       | 5909/25000 [02:24<07:16, 43.78it/s]

Ep 5900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 24%|██▍       | 6004/25000 [02:26<08:40, 36.48it/s]

Ep 6000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 24%|██▍       | 6104/25000 [02:29<09:45, 32.27it/s]

Ep 6100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 25%|██▍       | 6209/25000 [02:31<07:01, 44.55it/s]

Ep 6200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 7


 25%|██▌       | 6309/25000 [02:34<07:03, 44.10it/s]

Ep 6300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 26%|██▌       | 6404/25000 [02:36<07:05, 43.69it/s]

Ep 6400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 26%|██▌       | 6509/25000 [02:38<07:03, 43.65it/s]

Ep 6500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 5


 26%|██▋       | 6605/25000 [02:41<08:36, 35.60it/s]

Ep 6600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 27%|██▋       | 6705/25000 [02:44<08:23, 36.36it/s]

Ep 6700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 27%|██▋       | 6804/25000 [02:46<07:00, 43.30it/s]

Ep 6800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 28%|██▊       | 6909/25000 [02:49<06:51, 43.93it/s]

Ep 6900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


 28%|██▊       | 7009/25000 [02:51<06:48, 44.04it/s]

Ep 7000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 3


 28%|██▊       | 7109/25000 [02:53<06:46, 44.00it/s]

Ep 7100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 29%|██▉       | 7204/25000 [02:55<06:35, 45.05it/s]

Ep 7200/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 3


 29%|██▉       | 7307/25000 [02:58<07:57, 37.05it/s]

Ep 7300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 0


 30%|██▉       | 7404/25000 [03:01<07:04, 41.43it/s]

Ep 7400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 30%|███       | 7509/25000 [03:03<06:30, 44.80it/s]

Ep 7500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 30%|███       | 7609/25000 [03:06<06:28, 44.82it/s]

Ep 7600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 9


 31%|███       | 7709/25000 [03:08<06:26, 44.70it/s]

Ep 7700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 31%|███       | 7804/25000 [03:10<06:36, 43.38it/s]

Ep 7800/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 5


 32%|███▏      | 7904/25000 [03:13<07:45, 36.73it/s]

Ep 7900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 32%|███▏      | 8007/25000 [03:16<07:01, 40.28it/s]

Ep 8000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 32%|███▏      | 8107/25000 [03:18<06:19, 44.46it/s]

Ep 8100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 33%|███▎      | 8207/25000 [03:20<06:23, 43.84it/s]

Ep 8200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 33%|███▎      | 8307/25000 [03:22<06:12, 44.81it/s]

Ep 8300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 34%|███▎      | 8407/25000 [03:25<06:25, 43.08it/s]

Ep 8400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 34%|███▍      | 8504/25000 [03:28<08:07, 33.85it/s]

Ep 8500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 34%|███▍      | 8608/25000 [03:31<06:50, 39.95it/s]

Ep 8600/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 18.3, First Action 10


 35%|███▍      | 8708/25000 [03:33<06:01, 45.10it/s]

Ep 8700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 35%|███▌      | 8808/25000 [03:35<06:10, 43.72it/s]

Ep 8800/25000, Opt. Action: 6, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 6


 36%|███▌      | 8908/25000 [03:37<06:01, 44.54it/s]

Ep 8900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 36%|███▌      | 9008/25000 [03:40<06:00, 44.41it/s]

Ep 9000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 36%|███▋      | 9107/25000 [03:42<07:24, 35.75it/s]

Ep 9100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 37%|███▋      | 9203/25000 [03:45<07:56, 33.17it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 12.200000002980232, Cumulative-Regret: 12.799999997019768, AVG100-Regret: 17.8, First Action 9


 37%|███▋      | 9306/25000 [03:47<05:53, 44.37it/s]

Ep 9300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 1


 38%|███▊      | 9406/25000 [03:50<05:46, 45.03it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 38%|███▊      | 9506/25000 [03:52<05:45, 44.81it/s]

Ep 9500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 3


 38%|███▊      | 9606/25000 [03:54<05:55, 43.27it/s]

Ep 9600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 2


 39%|███▉      | 9704/25000 [03:57<07:21, 34.61it/s]

Ep 9700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 39%|███▉      | 9805/25000 [04:00<07:33, 33.49it/s]

Ep 9800/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 6


 40%|███▉      | 9908/25000 [04:02<05:42, 44.13it/s]

Ep 9900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 40%|████      | 10008/25000 [04:04<05:31, 45.19it/s]

Ep 10000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


 40%|████      | 10108/25000 [04:07<05:35, 44.38it/s]

Ep 10100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 41%|████      | 10208/25000 [04:09<05:22, 45.87it/s]

Ep 10200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 41%|████      | 10307/25000 [04:11<06:28, 37.86it/s]

Ep 10300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 42%|████▏     | 10404/25000 [04:14<07:26, 32.67it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 42%|████▏     | 10506/25000 [04:17<05:26, 44.43it/s]

Ep 10500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 42%|████▏     | 10606/25000 [04:19<05:18, 45.19it/s]

Ep 10600/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.4, First Action 1


 43%|████▎     | 10706/25000 [04:21<05:25, 43.95it/s]

Ep 10700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 43%|████▎     | 10806/25000 [04:23<05:26, 43.54it/s]

Ep 10800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 44%|████▎     | 10906/25000 [04:26<06:33, 35.79it/s]

Ep 10900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 44%|████▍     | 11006/25000 [04:29<06:57, 33.52it/s]

Ep 11000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 44%|████▍     | 11105/25000 [04:31<05:13, 44.29it/s]

Ep 11100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 45%|████▍     | 11205/25000 [04:34<05:07, 44.92it/s]

Ep 11200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 45%|████▌     | 11305/25000 [04:36<05:09, 44.20it/s]

Ep 11300/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 17.9, First Action 2


 46%|████▌     | 11405/25000 [04:38<05:10, 43.76it/s]

Ep 11400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 46%|████▌     | 11507/25000 [04:41<05:59, 37.53it/s]

Ep 11500/25000, Opt. Action: 2, Reward: 12.300000011920929, Cumulative-Regret: 12.699999988079071, AVG100-Regret: 17.9, First Action 2


 46%|████▋     | 11607/25000 [04:43<06:12, 35.93it/s]

Ep 11600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 47%|████▋     | 11705/25000 [04:46<05:16, 41.98it/s]

Ep 11700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


 47%|████▋     | 11805/25000 [04:48<04:59, 44.07it/s]

Ep 11800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 48%|████▊     | 11905/25000 [04:51<04:57, 44.04it/s]

Ep 11900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 48%|████▊     | 12005/25000 [04:53<04:52, 44.47it/s]

Ep 12000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 48%|████▊     | 12105/25000 [04:55<04:57, 43.35it/s]

Ep 12100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 49%|████▉     | 12205/25000 [04:58<05:51, 36.38it/s]

Ep 12200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 49%|████▉     | 12309/25000 [05:01<05:01, 42.05it/s]

Ep 12300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 50%|████▉     | 12404/25000 [05:03<04:50, 43.32it/s]

Ep 12400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 50%|█████     | 12509/25000 [05:05<04:43, 44.00it/s]

Ep 12500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 50%|█████     | 12609/25000 [05:08<04:35, 44.99it/s]

Ep 12600/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.2, First Action 8


 51%|█████     | 12709/25000 [05:10<04:32, 45.11it/s]

Ep 12700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 51%|█████     | 12806/25000 [05:13<05:35, 36.31it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 52%|█████▏    | 12908/25000 [05:15<05:22, 37.46it/s]

Ep 12900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 52%|█████▏    | 13007/25000 [05:18<04:32, 44.00it/s]

Ep 13000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


 52%|█████▏    | 13107/25000 [05:20<04:34, 43.39it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 53%|█████▎    | 13207/25000 [05:22<04:26, 44.30it/s]

Ep 13200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 53%|█████▎    | 13307/25000 [05:25<04:24, 44.19it/s]

Ep 13300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 2


 54%|█████▎    | 13407/25000 [05:27<05:04, 38.04it/s]

Ep 13400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 54%|█████▍    | 13503/25000 [05:30<05:45, 33.25it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 54%|█████▍    | 13606/25000 [05:32<04:17, 44.31it/s]

Ep 13600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 55%|█████▍    | 13706/25000 [05:35<04:12, 44.79it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 55%|█████▌    | 13806/25000 [05:37<04:13, 44.10it/s]

Ep 13800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 4


 56%|█████▌    | 13906/25000 [05:39<04:10, 44.22it/s]

Ep 13900/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 1


 56%|█████▌    | 14005/25000 [05:42<05:10, 35.37it/s]

Ep 14000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 56%|█████▋    | 14105/25000 [05:45<05:23, 33.64it/s]

Ep 14100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 57%|█████▋    | 14205/25000 [05:47<04:09, 43.24it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 4


 57%|█████▋    | 14305/25000 [05:49<04:00, 44.51it/s]

Ep 14300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 58%|█████▊    | 14405/25000 [05:52<03:55, 45.00it/s]

Ep 14400/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.3, First Action 9


 58%|█████▊    | 14505/25000 [05:54<03:54, 44.77it/s]

Ep 14500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


 58%|█████▊    | 14604/25000 [05:56<04:47, 36.20it/s]

Ep 14600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 6


 59%|█████▉    | 14704/25000 [05:59<05:04, 33.81it/s]

Ep 14700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 59%|█████▉    | 14809/25000 [06:02<03:40, 46.12it/s]

Ep 14800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 60%|█████▉    | 14909/25000 [06:04<03:44, 45.01it/s]

Ep 14900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 60%|██████    | 15009/25000 [06:06<03:40, 45.28it/s]

Ep 15000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 60%|██████    | 15109/25000 [06:09<03:42, 44.48it/s]

Ep 15100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 61%|██████    | 15206/25000 [06:11<04:25, 36.96it/s]

Ep 15200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 61%|██████    | 15303/25000 [06:14<04:53, 33.05it/s]

Ep 15300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 62%|██████▏   | 15405/25000 [06:16<03:40, 43.53it/s]

Ep 15400/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 6


 62%|██████▏   | 15505/25000 [06:19<03:31, 44.90it/s]

Ep 15500/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 62%|██████▏   | 15605/25000 [06:21<03:27, 45.30it/s]

Ep 15600/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.3, First Action 4


 63%|██████▎   | 15705/25000 [06:23<03:39, 42.40it/s]

Ep 15700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 63%|██████▎   | 15804/25000 [06:26<03:59, 38.43it/s]

Ep 15800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 64%|██████▎   | 15904/25000 [06:28<04:02, 37.51it/s]

Ep 15900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 64%|██████▍   | 16005/25000 [06:31<03:27, 43.27it/s]

Ep 16000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 64%|██████▍   | 16105/25000 [06:33<03:18, 44.80it/s]

Ep 16100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 65%|██████▍   | 16205/25000 [06:36<03:14, 45.21it/s]

Ep 16200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 6


 65%|██████▌   | 16305/25000 [06:38<03:21, 43.15it/s]

Ep 16300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 66%|██████▌   | 16405/25000 [06:40<03:31, 40.57it/s]

Ep 16400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 66%|██████▌   | 16506/25000 [06:43<04:03, 34.94it/s]

Ep 16500/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 4


 66%|██████▋   | 16608/25000 [06:46<03:11, 43.81it/s]

Ep 16600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 67%|██████▋   | 16708/25000 [06:48<03:06, 44.44it/s]

Ep 16700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 67%|██████▋   | 16808/25000 [06:50<03:10, 42.91it/s]

Ep 16800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 6


 68%|██████▊   | 16908/25000 [06:53<03:00, 44.78it/s]

Ep 16900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 68%|██████▊   | 17008/25000 [06:55<03:01, 44.10it/s]

Ep 17000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 68%|██████▊   | 17106/25000 [06:58<03:38, 36.09it/s]

Ep 17100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 69%|██████▉   | 17206/25000 [07:01<03:03, 42.42it/s]

Ep 17200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 69%|██████▉   | 17306/25000 [07:03<02:50, 45.13it/s]

Ep 17300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 6


 70%|██████▉   | 17406/25000 [07:05<02:53, 43.79it/s]

Ep 17400/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 17.6, First Action 6


 70%|███████   | 17506/25000 [07:07<02:50, 43.94it/s]

Ep 17500/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.6, First Action 7


 70%|███████   | 17606/25000 [07:10<02:44, 44.85it/s]

Ep 17600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 2


 71%|███████   | 17704/25000 [07:12<03:23, 35.93it/s]

Ep 17700/25000, Opt. Action: 0, Reward: 8.100000001490116, Cumulative-Regret: 16.899999998509884, AVG100-Regret: 18.0, First Action 8


 71%|███████   | 17806/25000 [07:15<03:08, 38.12it/s]

Ep 17800/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 72%|███████▏  | 17906/25000 [07:18<02:41, 43.86it/s]

Ep 17900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 72%|███████▏  | 18006/25000 [07:20<02:41, 43.42it/s]

Ep 18000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 72%|███████▏  | 18106/25000 [07:22<02:36, 43.94it/s]

Ep 18100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 5


 73%|███████▎  | 18206/25000 [07:24<02:34, 44.05it/s]

Ep 18200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 0


 73%|███████▎  | 18306/25000 [07:27<03:10, 35.11it/s]

Ep 18300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


 74%|███████▎  | 18406/25000 [07:30<03:09, 34.85it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 74%|███████▍  | 18509/25000 [07:32<02:26, 44.36it/s]

Ep 18500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 74%|███████▍  | 18609/25000 [07:35<02:23, 44.49it/s]

Ep 18600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 75%|███████▍  | 18709/25000 [07:37<02:20, 44.87it/s]

Ep 18700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 7


 75%|███████▌  | 18804/25000 [07:39<02:23, 43.16it/s]

Ep 18800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 76%|███████▌  | 18903/25000 [07:42<02:50, 35.84it/s]

Ep 18900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 7


 76%|███████▌  | 19004/25000 [07:44<02:54, 34.45it/s]

Ep 19000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 76%|███████▋  | 19108/25000 [07:47<02:18, 42.63it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 77%|███████▋  | 19208/25000 [07:49<02:11, 44.01it/s]

Ep 19200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 77%|███████▋  | 19308/25000 [07:52<02:05, 45.25it/s]

Ep 19300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 78%|███████▊  | 19408/25000 [07:54<02:03, 45.19it/s]

Ep 19400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 78%|███████▊  | 19505/25000 [07:56<02:27, 37.27it/s]

Ep 19500/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 5


 78%|███████▊  | 19603/25000 [07:59<02:30, 35.88it/s]

Ep 19600/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 1


 79%|███████▉  | 19704/25000 [08:01<01:58, 44.62it/s]

Ep 19700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 79%|███████▉  | 19809/25000 [08:04<01:56, 44.60it/s]

Ep 19800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 80%|███████▉  | 19904/25000 [08:06<01:57, 43.31it/s]

Ep 19900/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 2


 80%|████████  | 20009/25000 [08:08<01:50, 45.00it/s]

Ep 20000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 80%|████████  | 20105/25000 [08:11<02:13, 36.56it/s]

Ep 20100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 5


 81%|████████  | 20205/25000 [08:14<02:27, 32.60it/s]

Ep 20200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 81%|████████  | 20305/25000 [08:16<01:49, 42.80it/s]

Ep 20300/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 8


 82%|████████▏ | 20405/25000 [08:18<01:43, 44.50it/s]

Ep 20400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 82%|████████▏ | 20505/25000 [08:21<01:41, 44.14it/s]

Ep 20500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 82%|████████▏ | 20605/25000 [08:23<01:40, 43.90it/s]

Ep 20600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 83%|████████▎ | 20704/25000 [08:25<01:57, 36.68it/s]

Ep 20700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 83%|████████▎ | 20805/25000 [08:28<01:49, 38.31it/s]

Ep 20800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 84%|████████▎ | 20907/25000 [08:31<01:36, 42.49it/s]

Ep 20900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 84%|████████▍ | 21007/25000 [08:33<01:30, 44.02it/s]

Ep 21000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.2, First Action 5


 84%|████████▍ | 21107/25000 [08:36<01:26, 45.16it/s]

Ep 21100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 8


 85%|████████▍ | 21207/25000 [08:38<01:23, 45.37it/s]

Ep 21200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 85%|████████▌ | 21307/25000 [08:40<01:25, 43.27it/s]

Ep 21300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 86%|████████▌ | 21404/25000 [08:43<01:42, 34.97it/s]

Ep 21400/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 5


 86%|████████▌ | 21506/25000 [08:46<01:29, 38.94it/s]

Ep 21500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.2, First Action 3


 86%|████████▋ | 21605/25000 [08:48<01:16, 44.39it/s]

Ep 21600/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 8


 87%|████████▋ | 21705/25000 [08:50<01:13, 44.56it/s]

Ep 21700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.9, First Action 6


 87%|████████▋ | 21805/25000 [08:53<01:12, 44.33it/s]

Ep 21800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 7


 88%|████████▊ | 21905/25000 [08:55<01:09, 44.25it/s]

Ep 21900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 6


 88%|████████▊ | 22005/25000 [08:57<01:24, 35.32it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 3


 88%|████████▊ | 22105/25000 [09:00<01:28, 32.75it/s]

Ep 22100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 89%|████████▉ | 22209/25000 [09:03<01:02, 44.78it/s]

Ep 22200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 89%|████████▉ | 22304/25000 [09:05<01:00, 44.52it/s]

Ep 22300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 90%|████████▉ | 22404/25000 [09:07<01:01, 42.53it/s]

Ep 22400/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 9


 90%|█████████ | 22509/25000 [09:10<00:56, 44.38it/s]

Ep 22500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 90%|█████████ | 22606/25000 [09:12<01:05, 36.29it/s]

Ep 22600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 4


 91%|█████████ | 22706/25000 [09:15<01:09, 32.98it/s]

Ep 22700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 91%|█████████ | 22808/25000 [09:17<00:48, 45.57it/s]

Ep 22800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 5


 92%|█████████▏| 22908/25000 [09:20<00:48, 42.90it/s]

Ep 22900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 92%|█████████▏| 23008/25000 [09:22<00:45, 43.71it/s]

Ep 23000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 1


 92%|█████████▏| 23108/25000 [09:24<00:44, 42.36it/s]

Ep 23100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.7, First Action 0


 93%|█████████▎| 23206/25000 [09:27<00:48, 37.35it/s]

Ep 23200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.2, First Action 7


 93%|█████████▎| 23306/25000 [09:30<00:48, 34.61it/s]

Ep 23300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 6


 94%|█████████▎| 23405/25000 [09:32<00:35, 44.58it/s]

Ep 23400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 16.6, First Action 7


 94%|█████████▍| 23505/25000 [09:35<00:34, 43.85it/s]

Ep 23500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 8


 94%|█████████▍| 23605/25000 [09:37<00:31, 44.58it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.2, First Action 2


 95%|█████████▍| 23705/25000 [09:39<00:29, 43.71it/s]

Ep 23700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 95%|█████████▌| 23803/25000 [09:42<00:33, 35.75it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 5


 96%|█████████▌| 23903/25000 [09:45<00:32, 34.23it/s]

Ep 23900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.7, First Action 2


 96%|█████████▌| 24008/25000 [09:47<00:23, 42.78it/s]

Ep 24000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 16.8, First Action 7


 96%|█████████▋| 24108/25000 [09:50<00:19, 44.69it/s]

Ep 24100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 2


 97%|█████████▋| 24208/25000 [09:52<00:17, 45.04it/s]

Ep 24200/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 1


 97%|█████████▋| 24308/25000 [09:54<00:16, 42.77it/s]

Ep 24300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 7


 98%|█████████▊| 24404/25000 [09:56<00:16, 35.71it/s]

Ep 24400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.9, First Action 1


 98%|█████████▊| 24504/25000 [09:59<00:14, 33.46it/s]

Ep 24500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.3, First Action 2


 98%|█████████▊| 24605/25000 [10:02<00:08, 44.64it/s]

Ep 24600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.0, First Action 2


 99%|█████████▉| 24705/25000 [10:04<00:06, 44.68it/s]

Ep 24700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.1, First Action 7


 99%|█████████▉| 24805/25000 [10:06<00:04, 44.57it/s]

Ep 24800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 2


100%|█████████▉| 24905/25000 [10:09<00:02, 44.60it/s]

Ep 24900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.8, First Action 7


100%|██████████| 25000/25000 [10:11<00:00, 40.88it/s]


Ep 25000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.4, First Action 1

TEST:


 40%|████      | 120/300 [00:01<00:01, 117.55it/s]

Ep 100/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 15.7, First Action 5


 73%|███████▎  | 220/300 [00:01<00:00, 112.32it/s]

Ep 200/300, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 8


100%|██████████| 300/300 [00:02<00:00, 106.65it/s]


Ep 300/300, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 14.6, First Action 9

GAMMA 1 - LR 0.0001 - Entropy Decay True


  0%|          | 109/25000 [00:02<09:05, 45.67it/s]

Ep 100/25000, Opt. Action: 6, Reward: 4.399999976158142, Cumulative-Regret: 20.600000023841858, AVG100-Regret: 18.2, First Action 10


  1%|          | 204/25000 [00:04<09:19, 44.35it/s]

Ep 200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


  1%|          | 309/25000 [00:07<09:23, 43.79it/s]

Ep 300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  2%|▏         | 404/25000 [00:09<09:24, 43.61it/s]

Ep 400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


  2%|▏         | 505/25000 [00:11<10:54, 37.42it/s]

Ep 500/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.4, First Action 5


  2%|▏         | 604/25000 [00:14<11:39, 34.85it/s]

Ep 600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 7


  3%|▎         | 708/25000 [00:17<09:12, 43.95it/s]

Ep 700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


  3%|▎         | 808/25000 [00:19<09:16, 43.48it/s]

Ep 800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


  4%|▎         | 908/25000 [00:21<09:00, 44.61it/s]

Ep 900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


  4%|▍         | 1008/25000 [00:24<08:57, 44.67it/s]

Ep 1000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


  4%|▍         | 1108/25000 [00:26<09:13, 43.16it/s]

Ep 1100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


  5%|▍         | 1207/25000 [00:29<10:23, 38.15it/s]

Ep 1200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 3


  5%|▌         | 1306/25000 [00:32<09:46, 40.38it/s]

Ep 1300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  6%|▌         | 1406/25000 [00:34<08:52, 44.29it/s]

Ep 1400/25000, Opt. Action: 1, Reward: 4.200000002980232, Cumulative-Regret: 20.799999997019768, AVG100-Regret: 18.4, First Action 2


  6%|▌         | 1506/25000 [00:36<08:50, 44.24it/s]

Ep 1500/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 18.1, First Action 1


  6%|▋         | 1606/25000 [00:38<08:50, 44.09it/s]

Ep 1600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


  7%|▋         | 1706/25000 [00:41<08:43, 44.52it/s]

Ep 1700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


  7%|▋         | 1805/25000 [00:43<11:05, 34.83it/s]

Ep 1800/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.4, First Action 0


  8%|▊         | 1902/25000 [00:46<11:21, 33.89it/s]

Ep 1900/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.4, First Action 10


  8%|▊         | 2005/25000 [00:49<08:53, 43.11it/s]

Ep 2000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  8%|▊         | 2105/25000 [00:51<08:36, 44.35it/s]

Ep 2100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


  9%|▉         | 2205/25000 [00:53<08:32, 44.44it/s]

Ep 2200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


  9%|▉         | 2305/25000 [00:55<08:37, 43.87it/s]

Ep 2300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 10%|▉         | 2407/25000 [00:58<10:33, 35.65it/s]

Ep 2400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 10%|█         | 2504/25000 [01:01<11:00, 34.05it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 10%|█         | 2604/25000 [01:03<08:59, 41.52it/s]

Ep 2600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 11%|█         | 2709/25000 [01:06<08:29, 43.74it/s]

Ep 2700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 11%|█         | 2809/25000 [01:08<08:20, 44.33it/s]

Ep 2800/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.4, First Action 8


 12%|█▏        | 2904/25000 [01:10<08:16, 44.50it/s]

Ep 2900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


 12%|█▏        | 3005/25000 [01:13<10:20, 35.45it/s]

Ep 3000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 12%|█▏        | 3106/25000 [01:15<11:07, 32.78it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.0, First Action 1


 13%|█▎        | 3205/25000 [01:18<08:15, 43.98it/s]

Ep 3200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 13%|█▎        | 3305/25000 [01:20<08:21, 43.25it/s]

Ep 3300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


 14%|█▎        | 3405/25000 [01:23<08:08, 44.19it/s]

Ep 3400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 14%|█▍        | 3505/25000 [01:25<08:03, 44.45it/s]

Ep 3500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 14%|█▍        | 3606/25000 [01:27<10:37, 33.55it/s]

Ep 3600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 10


 15%|█▍        | 3703/25000 [01:30<10:26, 34.00it/s]

Ep 3700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 15%|█▌        | 3806/25000 [01:33<08:01, 44.00it/s]

Ep 3800/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.5, First Action 10


 16%|█▌        | 3906/25000 [01:35<07:45, 45.30it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 16%|█▌        | 4006/25000 [01:37<08:09, 42.88it/s]

Ep 4000/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 16%|█▋        | 4106/25000 [01:40<07:58, 43.68it/s]

Ep 4100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 17%|█▋        | 4204/25000 [01:42<09:31, 36.40it/s]

Ep 4200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 2


 17%|█▋        | 4304/25000 [01:45<10:00, 34.44it/s]

Ep 4300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


 18%|█▊        | 4405/25000 [01:48<07:54, 43.36it/s]

Ep 4400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 18%|█▊        | 4505/25000 [01:50<07:51, 43.47it/s]

Ep 4500/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.4, First Action 2


 18%|█▊        | 4605/25000 [01:52<07:44, 43.87it/s]

Ep 4600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 9


 19%|█▉        | 4705/25000 [01:55<07:42, 43.85it/s]

Ep 4700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 19%|█▉        | 4805/25000 [01:57<08:37, 39.05it/s]

Ep 4800/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 8


 20%|█▉        | 4903/25000 [02:00<09:00, 37.18it/s]

Ep 4900/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 20%|██        | 5006/25000 [02:02<07:48, 42.69it/s]

Ep 5000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 20%|██        | 5106/25000 [02:05<07:33, 43.90it/s]

Ep 5100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 21%|██        | 5206/25000 [02:07<07:24, 44.49it/s]

Ep 5200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 21%|██        | 5306/25000 [02:09<07:36, 43.12it/s]

Ep 5300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 22%|██▏       | 5406/25000 [02:12<07:23, 44.14it/s]

Ep 5400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 0


 22%|██▏       | 5506/25000 [02:14<09:04, 35.77it/s]

Ep 5500/25000, Opt. Action: 1, Reward: 16.200000002980232, Cumulative-Regret: 8.799999997019768, AVG100-Regret: 18.2, First Action 1


 22%|██▏       | 5608/25000 [02:17<08:19, 38.82it/s]

Ep 5600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 23%|██▎       | 5707/25000 [02:20<07:24, 43.42it/s]

Ep 5700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 6


 23%|██▎       | 5807/25000 [02:22<07:10, 44.57it/s]

Ep 5800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 0


 24%|██▎       | 5907/25000 [02:24<07:16, 43.76it/s]

Ep 5900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 24%|██▍       | 6007/25000 [02:26<07:17, 43.46it/s]

Ep 6000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 24%|██▍       | 6105/25000 [02:29<08:50, 35.65it/s]

Ep 6100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 7


 25%|██▍       | 6205/25000 [02:32<09:03, 34.61it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 25%|██▌       | 6309/25000 [02:34<07:04, 44.03it/s]

Ep 6300/25000, Opt. Action: 2, Reward: 7.600000023841858, Cumulative-Regret: 17.399999976158142, AVG100-Regret: 17.6, First Action 2


 26%|██▌       | 6409/25000 [02:37<06:52, 45.09it/s]

Ep 6400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 26%|██▌       | 6504/25000 [02:39<07:01, 43.88it/s]

Ep 6500/25000, Opt. Action: 0, Reward: 12.100000001490116, Cumulative-Regret: 12.899999998509884, AVG100-Regret: 18.5, First Action 0


 26%|██▋       | 6609/25000 [02:41<06:55, 44.28it/s]

Ep 6600/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.2, First Action 1


 27%|██▋       | 6706/25000 [02:44<07:58, 38.27it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 27%|██▋       | 6806/25000 [02:47<08:45, 34.63it/s]

Ep 6800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 28%|██▊       | 6907/25000 [02:49<06:45, 44.57it/s]

Ep 6900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 28%|██▊       | 7007/25000 [02:51<06:50, 43.87it/s]

Ep 7000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 28%|██▊       | 7107/25000 [02:54<06:54, 43.18it/s]

Ep 7100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 29%|██▉       | 7207/25000 [02:56<06:50, 43.39it/s]

Ep 7200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 29%|██▉       | 7307/25000 [02:58<08:10, 36.04it/s]

Ep 7300/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 30%|██▉       | 7406/25000 [03:01<08:32, 34.35it/s]

Ep 7400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 30%|███       | 7508/25000 [03:04<06:37, 43.98it/s]

Ep 7500/25000, Opt. Action: 3, Reward: 8.400000005960464, Cumulative-Regret: 16.599999994039536, AVG100-Regret: 17.6, First Action 3


 30%|███       | 7608/25000 [03:06<06:39, 43.57it/s]

Ep 7600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 1


 31%|███       | 7708/25000 [03:08<06:40, 43.18it/s]

Ep 7700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 31%|███       | 7808/25000 [03:11<06:27, 44.35it/s]

Ep 7800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 32%|███▏      | 7903/25000 [03:13<07:11, 39.58it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 0


 32%|███▏      | 8006/25000 [03:16<07:31, 37.62it/s]

Ep 8000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 32%|███▏      | 8107/25000 [03:19<06:27, 43.55it/s]

Ep 8100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 7


 33%|███▎      | 8207/25000 [03:21<06:20, 44.11it/s]

Ep 8200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 7


 33%|███▎      | 8307/25000 [03:23<06:25, 43.34it/s]

Ep 8300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 34%|███▎      | 8407/25000 [03:25<06:14, 44.30it/s]

Ep 8400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 34%|███▍      | 8507/25000 [03:28<06:07, 44.91it/s]

Ep 8500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 34%|███▍      | 8604/25000 [03:30<07:48, 35.01it/s]

Ep 8600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 35%|███▍      | 8708/25000 [03:33<06:59, 38.82it/s]

Ep 8700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 35%|███▌      | 8807/25000 [03:36<06:17, 42.94it/s]

Ep 8800/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.5, First Action 9


 36%|███▌      | 8907/25000 [03:38<06:03, 44.23it/s]

Ep 8900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 36%|███▌      | 9007/25000 [03:40<06:19, 42.18it/s]

Ep 9000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 36%|███▋      | 9107/25000 [03:43<05:55, 44.68it/s]

Ep 9100/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.3, First Action 3


 37%|███▋      | 9205/25000 [03:45<07:30, 35.09it/s]

Ep 9200/25000, Opt. Action: 1, Reward: 8.200000002980232, Cumulative-Regret: 16.799999997019768, AVG100-Regret: 18.3, First Action 3


 37%|███▋      | 9305/25000 [03:48<08:02, 32.54it/s]

Ep 9300/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 38%|███▊      | 9409/25000 [03:51<05:50, 44.44it/s]

Ep 9400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 38%|███▊      | 9509/25000 [03:53<05:50, 44.15it/s]

Ep 9500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


 38%|███▊      | 9609/25000 [03:55<05:42, 44.90it/s]

Ep 9600/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.2, First Action 3


 39%|███▉      | 9704/25000 [03:57<06:00, 42.46it/s]

Ep 9700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


 39%|███▉      | 9807/25000 [04:00<06:30, 38.95it/s]

Ep 9800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 40%|███▉      | 9903/25000 [04:03<07:42, 32.66it/s]

Ep 9900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 3


 40%|████      | 10004/25000 [04:05<05:44, 43.49it/s]

Ep 10000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.3, First Action 5


 40%|████      | 10109/25000 [04:07<05:38, 43.93it/s]

Ep 10100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 3


 41%|████      | 10204/25000 [04:10<05:44, 42.99it/s]

Ep 10200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 41%|████      | 10309/25000 [04:12<05:32, 44.22it/s]

Ep 10300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 7


 42%|████▏     | 10404/25000 [04:15<06:52, 35.40it/s]

Ep 10400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 42%|████▏     | 10505/25000 [04:17<07:22, 32.78it/s]

Ep 10500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 42%|████▏     | 10608/25000 [04:20<05:25, 44.21it/s]

Ep 10600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 43%|████▎     | 10708/25000 [04:22<05:33, 42.91it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 3


 43%|████▎     | 10808/25000 [04:25<05:23, 43.92it/s]

Ep 10800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


 44%|████▎     | 10908/25000 [04:27<05:13, 44.97it/s]

Ep 10900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


 44%|████▍     | 11003/25000 [04:29<06:17, 37.06it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 44%|████▍     | 11107/25000 [04:32<06:14, 37.07it/s]

Ep 11100/25000, Opt. Action: 0, Reward: 8.100000001490116, Cumulative-Regret: 16.899999998509884, AVG100-Regret: 18.5, First Action 5


 45%|████▍     | 11207/25000 [04:35<05:37, 40.88it/s]

Ep 11200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 7


 45%|████▌     | 11307/25000 [04:37<05:10, 44.03it/s]

Ep 11300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 46%|████▌     | 11407/25000 [04:39<05:08, 44.01it/s]

Ep 11400/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.8, First Action 6


 46%|████▌     | 11507/25000 [04:42<05:02, 44.60it/s]

Ep 11500/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.4, First Action 2


 46%|████▋     | 11607/25000 [04:44<04:57, 45.05it/s]

Ep 11600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 47%|████▋     | 11704/25000 [04:47<06:08, 36.10it/s]

Ep 11700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 47%|████▋     | 11806/25000 [04:50<05:52, 37.41it/s]

Ep 11800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 48%|████▊     | 11905/25000 [04:52<04:57, 44.09it/s]

Ep 11900/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 5


 48%|████▊     | 12005/25000 [04:54<04:46, 45.31it/s]

Ep 12000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 4


 48%|████▊     | 12105/25000 [04:56<04:54, 43.77it/s]

Ep 12100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 49%|████▉     | 12205/25000 [04:59<04:49, 44.19it/s]

Ep 12200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 4


 49%|████▉     | 12306/25000 [05:01<05:30, 38.43it/s]

Ep 12300/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.1, First Action 1


 50%|████▉     | 12406/25000 [05:04<06:21, 32.99it/s]

Ep 12400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 0


 50%|█████     | 12506/25000 [05:07<04:49, 43.12it/s]

Ep 12500/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.7, First Action 4


 50%|█████     | 12606/25000 [05:09<04:41, 44.11it/s]

Ep 12600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 9


 51%|█████     | 12706/25000 [05:11<04:38, 44.21it/s]

Ep 12700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 51%|█████     | 12806/25000 [05:13<04:43, 42.95it/s]

Ep 12800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 52%|█████▏    | 12905/25000 [05:16<05:23, 37.44it/s]

Ep 12900/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 4


 52%|█████▏    | 13005/25000 [05:19<06:12, 32.23it/s]

Ep 13000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 0


 52%|█████▏    | 13106/25000 [05:21<04:29, 44.15it/s]

Ep 13100/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 4


 53%|█████▎    | 13206/25000 [05:24<04:23, 44.74it/s]

Ep 13200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 9


 53%|█████▎    | 13306/25000 [05:26<04:31, 43.09it/s]

Ep 13300/25000, Opt. Action: 6, Reward: 8.699999988079071, Cumulative-Regret: 16.30000001192093, AVG100-Regret: 18.3, First Action 10


 54%|█████▎    | 13406/25000 [05:28<04:27, 43.41it/s]

Ep 13400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 54%|█████▍    | 13506/25000 [05:31<05:20, 35.83it/s]

Ep 13500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 54%|█████▍    | 13606/25000 [05:34<05:26, 34.86it/s]

Ep 13600/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 7


 55%|█████▍    | 13705/25000 [05:36<04:25, 42.51it/s]

Ep 13700/25000, Opt. Action: 8, Reward: 12.899999976158142, Cumulative-Regret: 12.100000023841858, AVG100-Regret: 18.2, First Action 8


 55%|█████▌    | 13805/25000 [05:38<04:08, 45.05it/s]

Ep 13800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 4


 56%|█████▌    | 13905/25000 [05:41<04:11, 44.09it/s]

Ep 13900/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.4, First Action 5


 56%|█████▌    | 14005/25000 [05:43<04:10, 43.98it/s]

Ep 14000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 56%|█████▋    | 14107/25000 [05:46<05:09, 35.18it/s]

Ep 14100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 2


 57%|█████▋    | 14204/25000 [05:48<05:21, 33.55it/s]

Ep 14200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 6


 57%|█████▋    | 14306/25000 [05:51<04:01, 44.30it/s]

Ep 14300/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 6


 58%|█████▊    | 14406/25000 [05:53<04:04, 43.39it/s]

Ep 14400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 58%|█████▊    | 14506/25000 [05:56<03:59, 43.77it/s]

Ep 14500/25000, Opt. Action: 6, Reward: 8.399999976158142, Cumulative-Regret: 16.600000023841858, AVG100-Regret: 18.1, First Action 10


 58%|█████▊    | 14606/25000 [05:58<03:55, 44.06it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 3


 59%|█████▉    | 14706/25000 [06:00<03:55, 43.69it/s]

Ep 14700/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 3


 59%|█████▉    | 14804/25000 [06:03<04:41, 36.20it/s]

Ep 14800/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 3


 60%|█████▉    | 14904/25000 [06:06<04:43, 35.59it/s]

Ep 14900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 60%|██████    | 15008/25000 [06:08<03:50, 43.37it/s]

Ep 15000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 60%|██████    | 15108/25000 [06:11<03:45, 43.92it/s]

Ep 15100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 61%|██████    | 15208/25000 [06:13<03:39, 44.60it/s]

Ep 15200/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.1, First Action 10


 61%|██████    | 15308/25000 [06:15<03:42, 43.48it/s]

Ep 15300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 8


 62%|██████▏   | 15403/25000 [06:18<04:25, 36.10it/s]

Ep 15400/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 62%|██████▏   | 15503/25000 [06:21<04:58, 31.77it/s]

Ep 15500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 8


 62%|██████▏   | 15605/25000 [06:23<03:30, 44.54it/s]

Ep 15600/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 17.8, First Action 10


 63%|██████▎   | 15705/25000 [06:25<03:32, 43.65it/s]

Ep 15700/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 18.6, First Action 10


 63%|██████▎   | 15805/25000 [06:28<03:35, 42.61it/s]

Ep 15800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 64%|██████▎   | 15905/25000 [06:30<03:27, 43.77it/s]

Ep 15900/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 4


 64%|██████▍   | 16003/25000 [06:33<04:08, 36.26it/s]

Ep 16000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 64%|██████▍   | 16107/25000 [06:35<04:13, 35.09it/s]

Ep 16100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 65%|██████▍   | 16205/25000 [06:38<03:19, 44.08it/s]

Ep 16200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 65%|██████▌   | 16305/25000 [06:40<03:25, 42.30it/s]

Ep 16300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 4


 66%|██████▌   | 16405/25000 [06:42<03:18, 43.22it/s]

Ep 16400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 66%|██████▌   | 16505/25000 [06:45<03:12, 44.11it/s]

Ep 16500/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 1


 66%|██████▋   | 16604/25000 [06:47<03:59, 35.10it/s]

Ep 16600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 67%|██████▋   | 16704/25000 [06:50<04:18, 32.04it/s]

Ep 16700/25000, Opt. Action: 4, Reward: 8.5, Cumulative-Regret: 16.5, AVG100-Regret: 18.4, First Action 3


 67%|██████▋   | 16807/25000 [06:53<03:09, 43.13it/s]

Ep 16800/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 68%|██████▊   | 16907/25000 [06:55<03:07, 43.07it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 68%|██████▊   | 17007/25000 [06:58<03:07, 42.55it/s]

Ep 17000/25000, Opt. Action: 3, Reward: 8.400000005960464, Cumulative-Regret: 16.599999994039536, AVG100-Regret: 18.3, First Action 2


 68%|██████▊   | 17107/25000 [07:00<02:58, 44.12it/s]

Ep 17100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 69%|██████▉   | 17203/25000 [07:02<03:43, 34.92it/s]

Ep 17200/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 2


 69%|██████▉   | 17306/25000 [07:05<03:50, 33.45it/s]

Ep 17300/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.3, First Action 1


 70%|██████▉   | 17409/25000 [07:08<02:50, 44.64it/s]

Ep 17400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 6


 70%|███████   | 17504/25000 [07:10<02:50, 43.92it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 70%|███████   | 17609/25000 [07:12<02:41, 45.66it/s]

Ep 17600/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.7, First Action 10


 71%|███████   | 17704/25000 [07:15<02:46, 43.76it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 16.200000002980232, Cumulative-Regret: 8.799999997019768, AVG100-Regret: 18.3, First Action 6


 71%|███████   | 17806/25000 [07:17<03:13, 37.13it/s]

Ep 17800/25000, Opt. Action: 7, Reward: 8.800000011920929, Cumulative-Regret: 16.19999998807907, AVG100-Regret: 18.4, First Action 6


 72%|███████▏  | 17903/25000 [07:20<03:15, 36.29it/s]

Ep 17900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 72%|███████▏  | 18009/25000 [07:23<02:39, 43.72it/s]

Ep 18000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 72%|███████▏  | 18109/25000 [07:25<02:34, 44.48it/s]

Ep 18100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 73%|███████▎  | 18204/25000 [07:27<02:36, 43.36it/s]

Ep 18200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 7


 73%|███████▎  | 18309/25000 [07:30<02:31, 44.20it/s]

Ep 18300/25000, Opt. Action: 0, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 6


 74%|███████▎  | 18404/25000 [07:32<02:51, 38.46it/s]

Ep 18400/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.2, First Action 7


 74%|███████▍  | 18505/25000 [07:35<02:54, 37.12it/s]

Ep 18500/25000, Opt. Action: 8, Reward: 4.799999952316284, Cumulative-Regret: 20.200000047683716, AVG100-Regret: 18.3, First Action 1


 74%|███████▍  | 18606/25000 [07:38<02:29, 42.84it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 1


 75%|███████▍  | 18706/25000 [07:40<02:25, 43.18it/s]

Ep 18700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 75%|███████▌  | 18806/25000 [07:42<02:29, 41.30it/s]

Ep 18800/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.5, First Action 1


 76%|███████▌  | 18906/25000 [07:44<02:18, 44.15it/s]

Ep 18900/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 1


 76%|███████▌  | 19006/25000 [07:47<02:23, 41.75it/s]

Ep 19000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 1


 76%|███████▋  | 19104/25000 [07:49<02:46, 35.39it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.8, First Action 6


 77%|███████▋  | 19208/25000 [07:52<02:23, 40.34it/s]

Ep 19200/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.0, First Action 0


 77%|███████▋  | 19308/25000 [07:55<02:10, 43.71it/s]

Ep 19300/25000, Opt. Action: 0, Reward: 4.100000001490116, Cumulative-Regret: 20.899999998509884, AVG100-Regret: 18.6, First Action 1


 78%|███████▊  | 19408/25000 [07:57<02:09, 43.29it/s]

Ep 19400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 78%|███████▊  | 19508/25000 [07:59<02:04, 44.04it/s]

Ep 19500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 10


 78%|███████▊  | 19608/25000 [08:02<02:00, 44.70it/s]

Ep 19600/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 18.7, First Action 1


 79%|███████▉  | 19704/25000 [08:04<02:30, 35.23it/s]

Ep 19700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 3


 79%|███████▉  | 19804/25000 [08:07<02:42, 31.91it/s]

Ep 19800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 8


 80%|███████▉  | 19907/25000 [08:09<01:58, 42.95it/s]

Ep 19900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 1


 80%|████████  | 20007/25000 [08:12<01:52, 44.24it/s]

Ep 20000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.9, First Action 3


 80%|████████  | 20107/25000 [08:14<01:54, 42.79it/s]

Ep 20100/25000, Opt. Action: 0, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.3, First Action 0


 81%|████████  | 20207/25000 [08:16<01:49, 43.71it/s]

Ep 20200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 81%|████████  | 20307/25000 [08:19<02:07, 36.89it/s]

Ep 20300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 82%|████████▏ | 20404/25000 [08:22<02:22, 32.20it/s]

Ep 20400/25000, Opt. Action: 5, Reward: 3.4000000953674316, Cumulative-Regret: 21.59999990463257, AVG100-Regret: 19.1, First Action 3


 82%|████████▏ | 20508/25000 [08:24<01:43, 43.40it/s]

Ep 20500/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.6, First Action 0


 82%|████████▏ | 20608/25000 [08:27<01:37, 45.03it/s]

Ep 20600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 83%|████████▎ | 20708/25000 [08:29<01:37, 44.19it/s]

Ep 20700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 83%|████████▎ | 20808/25000 [08:31<01:37, 43.03it/s]

Ep 20800/25000, Opt. Action: 8, Reward: 4.899999976158142, Cumulative-Regret: 20.100000023841858, AVG100-Regret: 18.4, First Action 4


 84%|████████▎ | 20905/25000 [08:34<01:51, 36.57it/s]

Ep 20900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 84%|████████▍ | 21005/25000 [08:37<01:52, 35.43it/s]

Ep 21000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 4


 84%|████████▍ | 21108/25000 [08:39<01:30, 42.78it/s]

Ep 21100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 7


 85%|████████▍ | 21208/25000 [08:42<01:25, 44.18it/s]

Ep 21200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 85%|████████▌ | 21308/25000 [08:44<01:24, 43.89it/s]

Ep 21300/25000, Opt. Action: 7, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 18.5, First Action 8


 86%|████████▌ | 21408/25000 [08:46<01:22, 43.79it/s]

Ep 21400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 86%|████████▌ | 21506/25000 [08:49<01:40, 34.65it/s]

Ep 21500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 86%|████████▋ | 21606/25000 [08:52<01:41, 33.55it/s]

Ep 21600/25000, Opt. Action: 6, Reward: 8.699999988079071, Cumulative-Regret: 16.30000001192093, AVG100-Regret: 18.7, First Action 10


 87%|████████▋ | 21705/25000 [08:54<01:17, 42.54it/s]

Ep 21700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 3


 87%|████████▋ | 21805/25000 [08:56<01:11, 44.41it/s]

Ep 21800/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.3, First Action 10


 88%|████████▊ | 21905/25000 [08:59<01:09, 44.67it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 5


 88%|████████▊ | 22005/25000 [09:01<01:06, 44.95it/s]

Ep 22000/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 88%|████████▊ | 22105/25000 [09:03<01:20, 35.89it/s]

Ep 22100/25000, Opt. Action: 0, Reward: 4.100000001490116, Cumulative-Regret: 20.899999998509884, AVG100-Regret: 18.2, First Action 4


 89%|████████▉ | 22206/25000 [09:06<01:24, 33.23it/s]

Ep 22200/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 7


 89%|████████▉ | 22305/25000 [09:09<01:03, 42.58it/s]

Ep 22300/25000, Opt. Action: 4, Reward: 8.5, Cumulative-Regret: 16.5, AVG100-Regret: 17.9, First Action 10


 90%|████████▉ | 22405/25000 [09:11<00:59, 43.97it/s]

Ep 22400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 90%|█████████ | 22505/25000 [09:14<00:56, 44.13it/s]

Ep 22500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 90%|█████████ | 22605/25000 [09:16<00:55, 43.50it/s]

Ep 22600/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 91%|█████████ | 22705/25000 [09:18<00:58, 39.18it/s]

Ep 22700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 91%|█████████ | 22805/25000 [09:21<00:58, 37.75it/s]

Ep 22800/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 18.6, First Action 3


 92%|█████████▏| 22909/25000 [09:24<00:51, 40.57it/s]

Ep 22900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 92%|█████████▏| 23009/25000 [09:26<00:45, 44.00it/s]

Ep 23000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 92%|█████████▏| 23104/25000 [09:29<00:42, 44.21it/s]

Ep 23100/25000, Opt. Action: 6, Reward: 4.399999976158142, Cumulative-Regret: 20.600000023841858, AVG100-Regret: 18.2, First Action 10


 93%|█████████▎| 23204/25000 [09:31<00:41, 42.89it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 93%|█████████▎| 23304/25000 [09:33<00:40, 41.85it/s]

Ep 23300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 94%|█████████▎| 23405/25000 [09:36<00:46, 34.30it/s]

Ep 23400/25000, Opt. Action: 0, Reward: 8.100000001490116, Cumulative-Regret: 16.899999998509884, AVG100-Regret: 18.4, First Action 4


 94%|█████████▍| 23505/25000 [09:39<00:45, 32.80it/s]

Ep 23500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 94%|█████████▍| 23607/25000 [09:41<00:31, 43.70it/s]

Ep 23600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 95%|█████████▍| 23707/25000 [09:43<00:29, 43.20it/s]

Ep 23700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


 95%|█████████▌| 23807/25000 [09:46<00:26, 45.06it/s]

Ep 23800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 96%|█████████▌| 23907/25000 [09:48<00:24, 45.07it/s]

Ep 23900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 1


 96%|█████████▌| 24005/25000 [09:51<00:28, 34.84it/s]

Ep 24000/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 4


 96%|█████████▋| 24105/25000 [09:54<00:27, 33.09it/s]

Ep 24100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 97%|█████████▋| 24206/25000 [09:56<00:18, 43.33it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 3


 97%|█████████▋| 24306/25000 [09:58<00:16, 43.17it/s]

Ep 24300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 98%|█████████▊| 24406/25000 [10:01<00:13, 44.43it/s]

Ep 24400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 98%|█████████▊| 24506/25000 [10:03<00:10, 45.11it/s]

Ep 24500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 98%|█████████▊| 24606/25000 [10:05<00:11, 33.82it/s]

Ep 24600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 99%|█████████▉| 24707/25000 [10:08<00:08, 34.65it/s]

Ep 24700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 1


 99%|█████████▉| 24805/25000 [10:11<00:04, 44.05it/s]

Ep 24800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


100%|█████████▉| 24905/25000 [10:13<00:02, 44.69it/s]

Ep 24900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


100%|██████████| 25000/25000 [10:15<00:00, 40.60it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2

TEST:


 38%|███▊      | 114/300 [00:00<00:01, 132.28it/s]

Ep 100/300, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 71%|███████▏  | 214/300 [00:01<00:00, 138.53it/s]

Ep 200/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 6


100%|██████████| 300/300 [00:02<00:00, 136.15it/s]


Ep 300/300, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.5, First Action 2

GAMMA 0.8 - LR 0.1 - Entropy Decay True


  0%|          | 105/25000 [00:02<11:06, 37.36it/s]

Ep 100/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 17.5, First Action 1


  1%|          | 205/25000 [00:05<12:03, 34.26it/s]

Ep 200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


  1%|          | 305/25000 [00:08<09:27, 43.51it/s]

Ep 300/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


  2%|▏         | 405/25000 [00:10<09:08, 44.85it/s]

Ep 400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 2


  2%|▏         | 505/25000 [00:12<09:10, 44.51it/s]

Ep 500/25000, Opt. Action: 3, Reward: 12.400000005960464, Cumulative-Regret: 12.599999994039536, AVG100-Regret: 17.8, First Action 3


  2%|▏         | 605/25000 [00:14<09:11, 44.23it/s]

Ep 600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 2


  3%|▎         | 706/25000 [00:17<10:51, 37.28it/s]

Ep 700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


  3%|▎         | 806/25000 [00:20<10:30, 38.37it/s]

Ep 800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


  4%|▎         | 907/25000 [00:22<09:24, 42.66it/s]

Ep 900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


  4%|▍         | 1007/25000 [00:25<08:56, 44.69it/s]

Ep 1000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 4


  4%|▍         | 1107/25000 [00:27<09:15, 43.05it/s]

Ep 1100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


  5%|▍         | 1207/25000 [00:29<08:57, 44.28it/s]

Ep 1200/25000, Opt. Action: 3, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.4, First Action 3


  5%|▌         | 1307/25000 [00:32<08:56, 44.20it/s]

Ep 1300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


  6%|▌         | 1406/25000 [00:34<10:54, 36.05it/s]

Ep 1400/25000, Opt. Action: 4, Reward: 4.5, Cumulative-Regret: 20.5, AVG100-Regret: 18.1, First Action 3


  6%|▌         | 1508/25000 [00:37<10:10, 38.48it/s]

Ep 1500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


  6%|▋         | 1607/25000 [00:40<08:50, 44.08it/s]

Ep 1600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 9


  7%|▋         | 1707/25000 [00:42<08:40, 44.78it/s]

Ep 1700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


  7%|▋         | 1807/25000 [00:44<08:51, 43.62it/s]

Ep 1800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 4


  8%|▊         | 1907/25000 [00:46<08:34, 44.91it/s]

Ep 1900/25000, Opt. Action: 1, Reward: 3.4000000059604645, Cumulative-Regret: 21.599999994039536, AVG100-Regret: 18.3, First Action 4


  8%|▊         | 2006/25000 [00:49<10:05, 37.97it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 4


  8%|▊         | 2103/25000 [00:52<12:06, 31.50it/s]

Ep 2100/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 6


  9%|▉         | 2208/25000 [00:54<08:40, 43.83it/s]

Ep 2200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


  9%|▉         | 2308/25000 [00:57<08:31, 44.38it/s]

Ep 2300/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 9


 10%|▉         | 2408/25000 [00:59<08:47, 42.80it/s]

Ep 2400/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 1


 10%|█         | 2508/25000 [01:01<08:45, 42.80it/s]

Ep 2500/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 10%|█         | 2604/25000 [01:04<09:52, 37.83it/s]

Ep 2600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 11%|█         | 2706/25000 [01:07<10:38, 34.89it/s]

Ep 2700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 11%|█         | 2806/25000 [01:09<08:35, 43.07it/s]

Ep 2800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 12%|█▏        | 2906/25000 [01:11<08:19, 44.19it/s]

Ep 2900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 6


 12%|█▏        | 3006/25000 [01:14<08:15, 44.35it/s]

Ep 3000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 12%|█▏        | 3106/25000 [01:16<08:28, 43.10it/s]

Ep 3100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 13%|█▎        | 3207/25000 [01:19<10:00, 36.29it/s]

Ep 3200/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 13%|█▎        | 3304/25000 [01:21<10:27, 34.58it/s]

Ep 3300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 14%|█▎        | 3405/25000 [01:24<08:36, 41.79it/s]

Ep 3400/25000, Opt. Action: 4, Reward: 8.5, Cumulative-Regret: 16.5, AVG100-Regret: 18.4, First Action 6


 14%|█▍        | 3505/25000 [01:26<08:02, 44.52it/s]

Ep 3500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 4


 14%|█▍        | 3605/25000 [01:29<08:14, 43.27it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 8.899999976158142, Cumulative-Regret: 16.100000023841858, AVG100-Regret: 17.7, First Action 2


 15%|█▍        | 3705/25000 [01:31<08:10, 43.42it/s]

Ep 3700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 15%|█▌        | 3805/25000 [01:33<07:57, 44.39it/s]

Ep 3800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 16%|█▌        | 3905/25000 [01:36<10:27, 33.60it/s]

Ep 3900/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 18.1, First Action 10


 16%|█▌        | 4007/25000 [01:39<09:13, 37.91it/s]

Ep 4000/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 8


 16%|█▋        | 4105/25000 [01:41<08:09, 42.72it/s]

Ep 4100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 2


 17%|█▋        | 4205/25000 [01:43<07:54, 43.80it/s]

Ep 4200/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 17.5, First Action 10


 17%|█▋        | 4305/25000 [01:46<07:48, 44.20it/s]

Ep 4300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 18%|█▊        | 4405/25000 [01:48<07:54, 43.44it/s]

Ep 4400/25000, Opt. Action: 8, Reward: 8.899999976158142, Cumulative-Regret: 16.100000023841858, AVG100-Regret: 18.2, First Action 6


 18%|█▊        | 4504/25000 [01:51<09:25, 36.27it/s]

Ep 4500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 18%|█▊        | 4604/25000 [01:54<09:57, 34.14it/s]

Ep 4600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 19%|█▉        | 4706/25000 [01:56<07:43, 43.75it/s]

Ep 4700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 19%|█▉        | 4806/25000 [01:58<07:38, 44.04it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 20%|█▉        | 4906/25000 [02:01<07:32, 44.44it/s]

Ep 4900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 20%|██        | 5006/25000 [02:03<07:51, 42.38it/s]

Ep 5000/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


 20%|██        | 5105/25000 [02:06<09:12, 36.01it/s]

Ep 5100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 21%|██        | 5206/25000 [02:09<09:44, 33.85it/s]

Ep 5200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


 21%|██        | 5305/25000 [02:11<07:20, 44.72it/s]

Ep 5300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 1


 22%|██▏       | 5405/25000 [02:13<07:23, 44.21it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 22%|██▏       | 5505/25000 [02:15<07:22, 44.02it/s]

Ep 5500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 22%|██▏       | 5605/25000 [02:18<07:21, 43.95it/s]

Ep 5600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 23%|██▎       | 5707/25000 [02:20<08:42, 36.92it/s]

Ep 5700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 23%|██▎       | 5803/25000 [02:23<10:06, 31.64it/s]

Ep 5800/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 1


 24%|██▎       | 5907/25000 [02:26<07:10, 44.31it/s]

Ep 5900/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


 24%|██▍       | 6007/25000 [02:28<07:16, 43.53it/s]

Ep 6000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 24%|██▍       | 6107/25000 [02:30<07:08, 44.06it/s]

Ep 6100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 25%|██▍       | 6207/25000 [02:33<07:03, 44.39it/s]

Ep 6200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


 25%|██▌       | 6304/25000 [02:35<08:29, 36.71it/s]

Ep 6300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 26%|██▌       | 6405/25000 [02:38<08:45, 35.41it/s]

Ep 6400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 26%|██▌       | 6508/25000 [02:41<07:03, 43.71it/s]

Ep 6500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 26%|██▋       | 6608/25000 [02:43<06:52, 44.56it/s]

Ep 6600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 27%|██▋       | 6708/25000 [02:45<06:58, 43.75it/s]

Ep 6700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 2


 27%|██▋       | 6808/25000 [02:47<06:49, 44.40it/s]

Ep 6800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 7


 28%|██▊       | 6908/25000 [02:50<06:53, 43.75it/s]

Ep 6900/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 9


 28%|██▊       | 7007/25000 [02:53<08:42, 34.44it/s]

Ep 7000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 28%|██▊       | 7109/25000 [02:55<07:42, 38.65it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 29%|██▉       | 7208/25000 [02:58<06:45, 43.92it/s]

Ep 7200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 29%|██▉       | 7308/25000 [03:00<06:42, 43.93it/s]

Ep 7300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 30%|██▉       | 7408/25000 [03:02<06:40, 43.92it/s]

Ep 7400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 3


 30%|███       | 7508/25000 [03:05<06:41, 43.58it/s]

Ep 7500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 30%|███       | 7604/25000 [03:07<08:13, 35.27it/s]

Ep 7600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 31%|███       | 7704/25000 [03:10<08:55, 32.30it/s]

Ep 7700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 31%|███       | 7807/25000 [03:12<06:27, 44.40it/s]

Ep 7800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 32%|███▏      | 7907/25000 [03:15<06:22, 44.70it/s]

Ep 7900/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 2


 32%|███▏      | 8007/25000 [03:17<06:27, 43.83it/s]

Ep 8000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 32%|███▏      | 8107/25000 [03:19<06:19, 44.49it/s]

Ep 8100/25000, Opt. Action: 7, Reward: 4.800000011920929, Cumulative-Regret: 20.19999998807907, AVG100-Regret: 18.0, First Action 3


 33%|███▎      | 8206/25000 [03:22<07:55, 35.28it/s]

Ep 8200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 33%|███▎      | 8304/25000 [03:25<08:20, 33.33it/s]

Ep 8300/25000, Opt. Action: 2, Reward: 4.300000011920929, Cumulative-Regret: 20.69999998807907, AVG100-Regret: 17.7, First Action 9


 34%|███▎      | 8409/25000 [03:27<06:13, 44.37it/s]

Ep 8400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 34%|███▍      | 8509/25000 [03:30<06:08, 44.73it/s]

Ep 8500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 4


 34%|███▍      | 8604/25000 [03:32<06:09, 44.35it/s]

Ep 8600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 7


 35%|███▍      | 8704/25000 [03:34<06:17, 43.12it/s]

Ep 8700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 0


 35%|███▌      | 8805/25000 [03:36<07:44, 34.88it/s]

Ep 8800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 36%|███▌      | 8906/25000 [03:39<07:44, 34.67it/s]

Ep 8900/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 2


 36%|███▌      | 9004/25000 [03:42<06:04, 43.93it/s]

Ep 9000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 36%|███▋      | 9109/25000 [03:44<05:56, 44.57it/s]

Ep 9100/25000, Opt. Action: 2, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 2


 37%|███▋      | 9204/25000 [03:46<06:01, 43.72it/s]

Ep 9200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 37%|███▋      | 9309/25000 [03:49<05:49, 44.86it/s]

Ep 9300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 38%|███▊      | 9404/25000 [03:51<06:05, 42.68it/s]

Ep 9400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 38%|███▊      | 9503/25000 [03:54<07:13, 35.71it/s]

Ep 9500/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 5


 38%|███▊      | 9604/25000 [03:57<06:58, 36.76it/s]

Ep 9600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 39%|███▉      | 9707/25000 [03:59<06:04, 41.93it/s]

Ep 9700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 39%|███▉      | 9807/25000 [04:01<05:44, 44.15it/s]

Ep 9800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 40%|███▉      | 9907/25000 [04:04<05:38, 44.57it/s]

Ep 9900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 40%|████      | 10007/25000 [04:06<05:31, 45.26it/s]

Ep 10000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


 40%|████      | 10107/25000 [04:09<06:56, 35.76it/s]

Ep 10100/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.2, First Action 5


 41%|████      | 10203/25000 [04:11<07:04, 34.89it/s]

Ep 10200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 41%|████      | 10309/25000 [04:14<05:31, 44.35it/s]

Ep 10300/25000, Opt. Action: 6, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.3, First Action 6


 42%|████▏     | 10409/25000 [04:16<05:28, 44.45it/s]

Ep 10400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 42%|████▏     | 10509/25000 [04:19<05:27, 44.26it/s]

Ep 10500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 42%|████▏     | 10604/25000 [04:21<05:21, 44.76it/s]

Ep 10600/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 2


 43%|████▎     | 10707/25000 [04:23<06:23, 37.25it/s]

Ep 10700/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 43%|████▎     | 10804/25000 [04:26<07:25, 31.85it/s]

Ep 10800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 44%|████▎     | 10906/25000 [04:29<05:12, 45.10it/s]

Ep 10900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 44%|████▍     | 11006/25000 [04:31<05:12, 44.78it/s]

Ep 11000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 44%|████▍     | 11106/25000 [04:33<05:15, 44.05it/s]

Ep 11100/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.3, First Action 1


 45%|████▍     | 11206/25000 [04:36<05:12, 44.18it/s]

Ep 11200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 45%|████▌     | 11305/25000 [04:38<06:26, 35.40it/s]

Ep 11300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 46%|████▌     | 11405/25000 [04:41<06:47, 33.34it/s]

Ep 11400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 46%|████▌     | 11509/25000 [04:44<05:01, 44.69it/s]

Ep 11500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 46%|████▋     | 11604/25000 [04:46<05:01, 44.42it/s]

Ep 11600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.1, First Action 1


 47%|████▋     | 11704/25000 [04:48<05:05, 43.48it/s]

Ep 11700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 5


 47%|████▋     | 11804/25000 [04:50<05:02, 43.59it/s]

Ep 11800/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 4


 48%|████▊     | 11905/25000 [04:53<06:01, 36.25it/s]

Ep 11900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 48%|████▊     | 12005/25000 [04:56<06:00, 36.04it/s]

Ep 12000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 5


 48%|████▊     | 12108/25000 [04:58<05:06, 42.05it/s]

Ep 12100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 6


 49%|████▉     | 12208/25000 [05:01<04:48, 44.35it/s]

Ep 12200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 3


 49%|████▉     | 12308/25000 [05:03<04:47, 44.19it/s]

Ep 12300/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 7


 50%|████▉     | 12408/25000 [05:05<04:46, 43.97it/s]

Ep 12400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 50%|█████     | 12508/25000 [05:08<04:40, 44.59it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 50%|█████     | 12606/25000 [05:10<05:47, 35.64it/s]

Ep 12600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 51%|█████     | 12709/25000 [05:13<04:59, 41.00it/s]

Ep 12700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 51%|█████     | 12809/25000 [05:16<04:31, 44.89it/s]

Ep 12800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 52%|█████▏    | 12909/25000 [05:18<04:31, 44.49it/s]

Ep 12900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 52%|█████▏    | 13004/25000 [05:20<04:38, 43.12it/s]

Ep 13000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


 52%|█████▏    | 13104/25000 [05:22<04:34, 43.32it/s]

Ep 13100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 53%|█████▎    | 13205/25000 [05:25<05:50, 33.63it/s]

Ep 13200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 9


 53%|█████▎    | 13305/25000 [05:28<05:34, 34.97it/s]

Ep 13300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 54%|█████▎    | 13408/25000 [05:30<04:19, 44.61it/s]

Ep 13400/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 54%|█████▍    | 13508/25000 [05:33<04:17, 44.71it/s]

Ep 13500/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 1


 54%|█████▍    | 13608/25000 [05:35<04:18, 44.04it/s]

Ep 13600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 55%|█████▍    | 13708/25000 [05:37<04:18, 43.70it/s]

Ep 13700/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 0


 55%|█████▌    | 13807/25000 [05:40<04:56, 37.77it/s]

Ep 13800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 56%|█████▌    | 13903/25000 [05:42<05:25, 34.06it/s]

Ep 13900/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 56%|█████▌    | 14008/25000 [05:45<04:11, 43.67it/s]

Ep 14000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 56%|█████▋    | 14108/25000 [05:47<04:09, 43.70it/s]

Ep 14100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 57%|█████▋    | 14208/25000 [05:50<03:59, 45.04it/s]

Ep 14200/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 5


 57%|█████▋    | 14308/25000 [05:52<04:05, 43.61it/s]

Ep 14300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 58%|█████▊    | 14404/25000 [05:54<04:50, 36.42it/s]

Ep 14400/25000, Opt. Action: 5, Reward: 4.600000023841858, Cumulative-Regret: 20.399999976158142, AVG100-Regret: 17.8, First Action 0


 58%|█████▊    | 14504/25000 [05:57<04:47, 36.48it/s]

Ep 14500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 3


 58%|█████▊    | 14607/25000 [06:00<04:03, 42.76it/s]

Ep 14600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 59%|█████▉    | 14707/25000 [06:02<03:54, 43.91it/s]

Ep 14700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 59%|█████▉    | 14807/25000 [06:04<03:50, 44.13it/s]

Ep 14800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 60%|█████▉    | 14907/25000 [06:07<03:44, 44.96it/s]

Ep 14900/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 0


 60%|██████    | 15007/25000 [06:09<03:50, 43.44it/s]

Ep 15000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 3


 60%|██████    | 15104/25000 [06:12<04:24, 37.37it/s]

Ep 15100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


 61%|██████    | 15206/25000 [06:15<04:07, 39.61it/s]

Ep 15200/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 3


 61%|██████    | 15308/25000 [06:17<03:48, 42.46it/s]

Ep 15300/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 3


 62%|██████▏   | 15408/25000 [06:19<03:37, 44.14it/s]

Ep 15400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 62%|██████▏   | 15508/25000 [06:22<03:33, 44.45it/s]

Ep 15500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 5


 62%|██████▏   | 15608/25000 [06:24<03:36, 43.35it/s]

Ep 15600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 9


 63%|██████▎   | 15706/25000 [06:27<04:29, 34.49it/s]

Ep 15700/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 63%|██████▎   | 15803/25000 [06:29<04:45, 32.25it/s]

Ep 15800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 4


 64%|██████▎   | 15908/25000 [06:32<03:28, 43.66it/s]

Ep 15900/25000, Opt. Action: 4, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 2


 64%|██████▍   | 16008/25000 [06:34<03:24, 43.98it/s]

Ep 16000/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.7, First Action 2


 64%|██████▍   | 16108/25000 [06:36<03:26, 43.10it/s]

Ep 16100/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 65%|██████▍   | 16208/25000 [06:39<03:18, 44.40it/s]

Ep 16200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 65%|██████▌   | 16304/25000 [06:41<04:09, 34.85it/s]

Ep 16300/25000, Opt. Action: 0, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 0


 66%|██████▌   | 16404/25000 [06:44<04:02, 35.41it/s]

Ep 16400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 9


 66%|██████▌   | 16506/25000 [06:47<03:09, 44.81it/s]

Ep 16500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 66%|██████▋   | 16606/25000 [06:49<03:13, 43.37it/s]

Ep 16600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 2


 67%|██████▋   | 16706/25000 [06:51<03:11, 43.29it/s]

Ep 16700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 67%|██████▋   | 16806/25000 [06:54<03:06, 44.05it/s]

Ep 16800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 68%|██████▊   | 16905/25000 [06:56<03:48, 35.38it/s]

Ep 16900/25000, Opt. Action: 5, Reward: 8.600000023841858, Cumulative-Regret: 16.399999976158142, AVG100-Regret: 18.1, First Action 3


 68%|██████▊   | 17005/25000 [06:59<04:01, 33.09it/s]

Ep 17000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 68%|██████▊   | 17109/25000 [07:02<03:00, 43.72it/s]

Ep 17100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 69%|██████▉   | 17209/25000 [07:04<02:55, 44.33it/s]

Ep 17200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 69%|██████▉   | 17309/25000 [07:06<02:52, 44.51it/s]

Ep 17300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 5


 70%|██████▉   | 17409/25000 [07:09<02:52, 43.97it/s]

Ep 17400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 5


 70%|███████   | 17504/25000 [07:11<03:30, 35.56it/s]

Ep 17500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 70%|███████   | 17606/25000 [07:14<03:37, 33.97it/s]

Ep 17600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 6


 71%|███████   | 17705/25000 [07:16<02:51, 42.63it/s]

Ep 17700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 71%|███████   | 17805/25000 [07:19<02:43, 43.89it/s]

Ep 17800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 6


 72%|███████▏  | 17905/25000 [07:21<02:42, 43.59it/s]

Ep 17900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 72%|███████▏  | 18005/25000 [07:23<02:35, 45.08it/s]

Ep 18000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 6


 72%|███████▏  | 18105/25000 [07:26<02:39, 43.34it/s]

Ep 18100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 73%|███████▎  | 18207/25000 [07:28<03:12, 35.35it/s]

Ep 18200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 3


 73%|███████▎  | 18307/25000 [07:31<02:44, 40.71it/s]

Ep 18300/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.5, First Action 4


 74%|███████▎  | 18407/25000 [07:34<02:29, 43.96it/s]

Ep 18400/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 3


 74%|███████▍  | 18507/25000 [07:36<02:24, 44.81it/s]

Ep 18500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.3, First Action 1


 74%|███████▍  | 18607/25000 [07:38<02:27, 43.48it/s]

Ep 18600/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.8, First Action 8


 75%|███████▍  | 18707/25000 [07:40<02:22, 44.06it/s]

Ep 18700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 0


 75%|███████▌  | 18804/25000 [07:43<03:08, 32.84it/s]

Ep 18800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 7


 76%|███████▌  | 18904/25000 [07:46<02:47, 36.40it/s]

Ep 18900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 6


 76%|███████▌  | 19005/25000 [07:48<02:14, 44.49it/s]

Ep 19000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 76%|███████▋  | 19105/25000 [07:51<02:15, 43.51it/s]

Ep 19100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 77%|███████▋  | 19205/25000 [07:53<02:16, 42.60it/s]

Ep 19200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 2


 77%|███████▋  | 19305/25000 [07:55<02:06, 44.89it/s]

Ep 19300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 78%|███████▊  | 19404/25000 [07:58<02:34, 36.22it/s]

Ep 19400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 7


 78%|███████▊  | 19504/25000 [08:01<02:42, 33.88it/s]

Ep 19500/25000, Opt. Action: 6, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 8


 78%|███████▊  | 19606/25000 [08:03<02:04, 43.33it/s]

Ep 19600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 79%|███████▉  | 19706/25000 [08:06<02:00, 44.01it/s]

Ep 19700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 79%|███████▉  | 19806/25000 [08:08<01:56, 44.49it/s]

Ep 19800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 3


 80%|███████▉  | 19906/25000 [08:10<01:55, 44.09it/s]

Ep 19900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 80%|████████  | 20005/25000 [08:13<02:22, 35.07it/s]

Ep 20000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 3


 80%|████████  | 20106/25000 [08:15<02:28, 33.02it/s]

Ep 20100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 81%|████████  | 20207/25000 [08:18<01:55, 41.51it/s]

Ep 20200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 81%|████████  | 20307/25000 [08:20<01:47, 43.65it/s]

Ep 20300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 82%|████████▏ | 20407/25000 [08:23<01:42, 44.64it/s]

Ep 20400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 82%|████████▏ | 20507/25000 [08:25<01:42, 43.76it/s]

Ep 20500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 2


 82%|████████▏ | 20606/25000 [08:27<02:00, 36.37it/s]

Ep 20600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 2


 83%|████████▎ | 20703/25000 [08:30<01:59, 35.91it/s]

Ep 20700/25000, Opt. Action: 7, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.2, First Action 7


 83%|████████▎ | 20808/25000 [08:33<01:41, 41.21it/s]

Ep 20800/25000, Opt. Action: 3, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.3, First Action 3


 84%|████████▎ | 20908/25000 [08:35<01:32, 44.25it/s]

Ep 20900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 84%|████████▍ | 21008/25000 [08:38<01:31, 43.84it/s]

Ep 21000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 84%|████████▍ | 21108/25000 [08:40<01:29, 43.68it/s]

Ep 21100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 85%|████████▍ | 21208/25000 [08:42<01:27, 43.32it/s]

Ep 21200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 85%|████████▌ | 21307/25000 [08:45<01:41, 36.43it/s]

Ep 21300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 0


 86%|████████▌ | 21404/25000 [08:48<01:40, 35.76it/s]

Ep 21400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 86%|████████▌ | 21505/25000 [08:50<01:19, 43.92it/s]

Ep 21500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 86%|████████▋ | 21605/25000 [08:52<01:18, 43.09it/s]

Ep 21600/25000, Opt. Action: 6, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 18.0, First Action 8


 87%|████████▋ | 21705/25000 [08:55<01:14, 44.44it/s]

Ep 21700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 4


 87%|████████▋ | 21805/25000 [08:57<01:14, 42.94it/s]

Ep 21800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 3


 88%|████████▊ | 21907/25000 [09:00<01:24, 36.49it/s]

Ep 21900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 88%|████████▊ | 22004/25000 [09:03<01:30, 33.24it/s]

Ep 22000/25000, Opt. Action: 5, Reward: 21.0, Cumulative-Regret: 4.0, AVG100-Regret: 17.7, First Action 5


 88%|████████▊ | 22106/25000 [09:05<01:08, 42.40it/s]

Ep 22100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 89%|████████▉ | 22206/25000 [09:07<01:03, 43.87it/s]

Ep 22200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 4


 89%|████████▉ | 22306/25000 [09:10<01:00, 44.62it/s]

Ep 22300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 4


 90%|████████▉ | 22406/25000 [09:12<00:58, 44.14it/s]

Ep 22400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 90%|█████████ | 22504/25000 [09:14<01:10, 35.42it/s]

Ep 22500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.5, First Action 5


 90%|█████████ | 22605/25000 [09:17<01:09, 34.70it/s]

Ep 22600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 6


 91%|█████████ | 22704/25000 [09:20<00:53, 42.62it/s]

Ep 22700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 5


 91%|█████████ | 22809/25000 [09:22<00:49, 43.82it/s]

Ep 22800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 92%|█████████▏| 22909/25000 [09:25<00:46, 44.83it/s]

Ep 22900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 92%|█████████▏| 23004/25000 [09:27<00:44, 44.52it/s]

Ep 23000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 92%|█████████▏| 23104/25000 [09:29<00:45, 42.03it/s]

Ep 23100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 93%|█████████▎| 23205/25000 [09:32<00:47, 37.82it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 93%|█████████▎| 23307/25000 [09:35<00:47, 35.56it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 8


 94%|█████████▎| 23406/25000 [09:37<00:36, 43.15it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 8


 94%|█████████▍| 23506/25000 [09:39<00:33, 43.97it/s]

Ep 23500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 94%|█████████▍| 23606/25000 [09:42<00:31, 43.72it/s]

Ep 23600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 95%|█████████▍| 23706/25000 [09:44<00:28, 44.92it/s]

Ep 23700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 95%|█████████▌| 23808/25000 [09:47<00:30, 38.87it/s]

Ep 23800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 96%|█████████▌| 23905/25000 [09:49<00:34, 32.07it/s]

Ep 23900/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.7, First Action 8


 96%|█████████▌| 24005/25000 [09:52<00:22, 43.48it/s]

Ep 24000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 96%|█████████▋| 24105/25000 [09:54<00:20, 43.04it/s]

Ep 24100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 9


 97%|█████████▋| 24205/25000 [09:57<00:18, 43.56it/s]

Ep 24200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 97%|█████████▋| 24305/25000 [09:59<00:15, 43.91it/s]

Ep 24300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 98%|█████████▊| 24404/25000 [10:01<00:15, 37.30it/s]

Ep 24400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 98%|█████████▊| 24504/25000 [10:04<00:14, 33.47it/s]

Ep 24500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 98%|█████████▊| 24604/25000 [10:07<00:08, 44.42it/s]

Ep 24600/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 8


 99%|█████████▉| 24704/25000 [10:09<00:06, 42.31it/s]

Ep 24700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


 99%|█████████▉| 24804/25000 [10:11<00:04, 42.49it/s]

Ep 24800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


100%|█████████▉| 24909/25000 [10:14<00:02, 44.27it/s]

Ep 24900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8


100%|██████████| 25000/25000 [10:16<00:00, 40.54it/s]


Ep 25000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8

TEST:


 38%|███▊      | 113/300 [00:01<00:01, 102.13it/s]

Ep 100/300, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 71%|███████   | 213/300 [00:02<00:00, 98.66it/s] 

Ep 200/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


100%|██████████| 300/300 [00:02<00:00, 101.35it/s]


Ep 300/300, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8

GAMMA 0.9 - LR 0.0001 - Entropy Decay False


  0%|          | 108/25000 [00:02<09:36, 43.20it/s]

Ep 100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


  1%|          | 208/25000 [00:05<09:39, 42.80it/s]

Ep 200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


  1%|          | 308/25000 [00:07<09:30, 43.28it/s]

Ep 300/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


  2%|▏         | 408/25000 [00:09<09:14, 44.32it/s]

Ep 400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


  2%|▏         | 503/25000 [00:12<11:28, 35.59it/s]

Ep 500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 4


  2%|▏         | 606/25000 [00:14<12:02, 33.76it/s]

Ep 600/25000, Opt. Action: 3, Reward: 4.4000000059604645, Cumulative-Regret: 20.599999994039536, AVG100-Regret: 18.2, First Action 2


  3%|▎         | 709/25000 [00:17<09:26, 42.89it/s]

Ep 700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 3


  3%|▎         | 809/25000 [00:20<09:16, 43.44it/s]

Ep 800/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 4


  4%|▎         | 904/25000 [00:22<08:58, 44.78it/s]

Ep 900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


  4%|▍         | 1009/25000 [00:24<09:00, 44.43it/s]

Ep 1000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 7


  4%|▍         | 1106/25000 [00:26<10:45, 37.01it/s]

Ep 1100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  5%|▍         | 1204/25000 [00:29<10:26, 38.00it/s]

Ep 1200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 6


  5%|▌         | 1305/25000 [00:32<09:20, 42.26it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


  6%|▌         | 1405/25000 [00:34<08:55, 44.07it/s]

Ep 1400/25000, Opt. Action: 4, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 7


  6%|▌         | 1505/25000 [00:37<08:47, 44.52it/s]

Ep 1500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 10


  6%|▋         | 1605/25000 [00:39<09:12, 42.32it/s]

Ep 1600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 4


  7%|▋         | 1705/25000 [00:41<09:23, 41.34it/s]

Ep 1700/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 6


  7%|▋         | 1807/25000 [00:44<10:11, 37.90it/s]

Ep 1800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


  8%|▊         | 1905/25000 [00:47<09:18, 41.34it/s]

Ep 1900/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 9


  8%|▊         | 2005/25000 [00:49<08:34, 44.70it/s]

Ep 2000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


  8%|▊         | 2105/25000 [00:51<08:36, 44.31it/s]

Ep 2100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 4


  9%|▉         | 2205/25000 [00:54<08:30, 44.62it/s]

Ep 2200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 6


  9%|▉         | 2305/25000 [00:56<08:37, 43.89it/s]

Ep 2300/25000, Opt. Action: 4, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 8


 10%|▉         | 2403/25000 [00:59<11:01, 34.15it/s]

Ep 2400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 7


 10%|█         | 2503/25000 [01:02<11:32, 32.48it/s]

Ep 2500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 10%|█         | 2606/25000 [01:04<08:39, 43.09it/s]

Ep 2600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 11%|█         | 2706/25000 [01:06<08:28, 43.84it/s]

Ep 2700/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 7


 11%|█         | 2806/25000 [01:09<08:44, 42.33it/s]

Ep 2800/25000, Opt. Action: 2, Reward: 8.300000011920929, Cumulative-Regret: 16.69999998807907, AVG100-Regret: 17.6, First Action 10


 12%|█▏        | 2906/25000 [01:11<08:19, 44.25it/s]

Ep 2900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 12%|█▏        | 3005/25000 [01:14<10:11, 35.99it/s]

Ep 3000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 12%|█▏        | 3105/25000 [01:17<11:43, 31.12it/s]

Ep 3100/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 13%|█▎        | 3208/25000 [01:19<08:30, 42.67it/s]

Ep 3200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 13%|█▎        | 3308/25000 [01:21<08:13, 43.99it/s]

Ep 3300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 7


 14%|█▎        | 3408/25000 [01:24<08:21, 43.09it/s]

Ep 3400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 0


 14%|█▍        | 3508/25000 [01:26<08:17, 43.22it/s]

Ep 3500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 2


 14%|█▍        | 3607/25000 [01:29<09:33, 37.32it/s]

Ep 3600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 9


 15%|█▍        | 3703/25000 [01:32<10:30, 33.78it/s]

Ep 3700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 15%|█▌        | 3805/25000 [01:34<08:03, 43.88it/s]

Ep 3800/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 16%|█▌        | 3905/25000 [01:36<07:57, 44.17it/s]

Ep 3900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 16%|█▌        | 4005/25000 [01:39<08:23, 41.72it/s]

Ep 4000/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 0


 16%|█▋        | 4105/25000 [01:41<08:10, 42.58it/s]

Ep 4100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 4


 17%|█▋        | 4207/25000 [01:44<09:21, 37.01it/s]

Ep 4200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 17%|█▋        | 4303/25000 [01:46<10:37, 32.49it/s]

Ep 4300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


 18%|█▊        | 4406/25000 [01:49<07:51, 43.72it/s]

Ep 4400/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 18%|█▊        | 4506/25000 [01:52<08:04, 42.31it/s]

Ep 4500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 18%|█▊        | 4606/25000 [01:54<07:52, 43.20it/s]

Ep 4600/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 5


 19%|█▉        | 4706/25000 [01:56<07:45, 43.64it/s]

Ep 4700/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 19%|█▉        | 4806/25000 [01:59<09:46, 34.43it/s]

Ep 4800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 20%|█▉        | 4906/25000 [02:02<09:53, 33.88it/s]

Ep 4900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 6


 20%|██        | 5005/25000 [02:04<07:42, 43.26it/s]

Ep 5000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 20%|██        | 5105/25000 [02:07<07:46, 42.62it/s]

Ep 5100/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 6


 21%|██        | 5205/25000 [02:09<07:30, 43.96it/s]

Ep 5200/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 4


 21%|██        | 5305/25000 [02:11<07:50, 41.88it/s]

Ep 5300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 22%|██▏       | 5405/25000 [02:13<07:35, 42.97it/s]

Ep 5400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 6


 22%|██▏       | 5506/25000 [02:16<09:01, 35.98it/s]

Ep 5500/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.5, First Action 6


 22%|██▏       | 5606/25000 [02:19<07:56, 40.70it/s]

Ep 5600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 7


 23%|██▎       | 5706/25000 [02:21<07:19, 43.93it/s]

Ep 5700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 7


 23%|██▎       | 5806/25000 [02:24<07:26, 42.99it/s]

Ep 5800/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 24%|██▎       | 5906/25000 [02:26<07:37, 41.75it/s]

Ep 5900/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 9


 24%|██▍       | 6006/25000 [02:28<07:10, 44.10it/s]

Ep 6000/25000, Opt. Action: 6, Reward: 4.699999988079071, Cumulative-Regret: 20.30000001192093, AVG100-Regret: 18.2, First Action 7


 24%|██▍       | 6103/25000 [02:31<08:53, 35.41it/s]

Ep 6100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 3


 25%|██▍       | 6206/25000 [02:34<07:47, 40.24it/s]

Ep 6200/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 25%|██▌       | 6306/25000 [02:36<07:10, 43.42it/s]

Ep 6300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 26%|██▌       | 6406/25000 [02:39<06:56, 44.62it/s]

Ep 6400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 5


 26%|██▌       | 6506/25000 [02:41<07:00, 43.95it/s]

Ep 6500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 2


 26%|██▋       | 6606/25000 [02:43<06:50, 44.77it/s]

Ep 6600/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 27%|██▋       | 6704/25000 [02:46<08:40, 35.14it/s]

Ep 6700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 5


 27%|██▋       | 6804/25000 [02:49<09:02, 33.52it/s]

Ep 6800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 8


 28%|██▊       | 6908/25000 [02:51<06:52, 43.82it/s]

Ep 6900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 28%|██▊       | 7008/25000 [02:54<06:42, 44.72it/s]

Ep 7000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 28%|██▊       | 7108/25000 [02:56<06:44, 44.23it/s]

Ep 7100/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 5


 29%|██▉       | 7208/25000 [02:58<06:49, 43.48it/s]

Ep 7200/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.2, First Action 5


 29%|██▉       | 7306/25000 [03:01<07:58, 36.98it/s]

Ep 7300/25000, Opt. Action: 5, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.8, First Action 4


 30%|██▉       | 7405/25000 [03:03<08:44, 33.55it/s]

Ep 7400/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 30%|███       | 7509/25000 [03:06<06:32, 44.56it/s]

Ep 7500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 30%|███       | 7609/25000 [03:08<06:30, 44.55it/s]

Ep 7600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 31%|███       | 7709/25000 [03:11<06:23, 45.10it/s]

Ep 7700/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.7, First Action 0


 31%|███       | 7804/25000 [03:13<06:41, 42.86it/s]

Ep 7800/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 2


 32%|███▏      | 7906/25000 [03:15<07:35, 37.54it/s]

Ep 7900/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 32%|███▏      | 8007/25000 [03:18<07:34, 37.42it/s]

Ep 8000/25000, Opt. Action: 5, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 5


 32%|███▏      | 8108/25000 [03:21<06:34, 42.79it/s]

Ep 8100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 33%|███▎      | 8208/25000 [03:23<06:09, 45.41it/s]

Ep 8200/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 33%|███▎      | 8308/25000 [03:25<06:16, 44.28it/s]

Ep 8300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 34%|███▎      | 8408/25000 [03:27<06:23, 43.32it/s]

Ep 8400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 6


 34%|███▍      | 8508/25000 [03:30<06:15, 43.96it/s]

Ep 8500/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 5


 34%|███▍      | 8605/25000 [03:32<07:12, 37.86it/s]

Ep 8600/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 35%|███▍      | 8707/25000 [03:35<07:47, 34.88it/s]

Ep 8700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 7


 35%|███▌      | 8805/25000 [03:37<06:01, 44.82it/s]

Ep 8800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 4


 36%|███▌      | 8905/25000 [03:40<06:01, 44.56it/s]

Ep 8900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 36%|███▌      | 9005/25000 [03:42<06:22, 41.85it/s]

Ep 9000/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 8


 36%|███▋      | 9105/25000 [03:44<05:57, 44.49it/s]

Ep 9100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 0


 37%|███▋      | 9206/25000 [03:47<06:53, 38.19it/s]

Ep 9200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 5


 37%|███▋      | 9306/25000 [03:50<07:57, 32.87it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 38%|███▊      | 9408/25000 [03:52<05:49, 44.67it/s]

Ep 9400/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 5


 38%|███▊      | 9508/25000 [03:55<05:46, 44.74it/s]

Ep 9500/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 38%|███▊      | 9608/25000 [03:57<05:54, 43.40it/s]

Ep 9600/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 3


 39%|███▉      | 9708/25000 [03:59<05:46, 44.12it/s]

Ep 9700/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 39%|███▉      | 9804/25000 [04:01<06:54, 36.65it/s]

Ep 9800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 40%|███▉      | 9905/25000 [04:04<06:49, 36.89it/s]

Ep 9900/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.3, First Action 1


 40%|████      | 10007/25000 [04:07<05:47, 43.17it/s]

Ep 10000/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 18.0, First Action 1


 40%|████      | 10107/25000 [04:09<05:36, 44.21it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 41%|████      | 10207/25000 [04:11<05:38, 43.76it/s]

Ep 10200/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 41%|████      | 10307/25000 [04:14<05:31, 44.27it/s]

Ep 10300/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 42%|████▏     | 10407/25000 [04:16<05:28, 44.46it/s]

Ep 10400/25000, Opt. Action: 8, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.4, First Action 8


 42%|████▏     | 10506/25000 [04:19<07:17, 33.14it/s]

Ep 10500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 42%|████▏     | 10605/25000 [04:22<06:00, 39.94it/s]

Ep 10600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 8


 43%|████▎     | 10705/25000 [04:24<05:30, 43.22it/s]

Ep 10700/25000, Opt. Action: 3, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.8, First Action 3


 43%|████▎     | 10805/25000 [04:26<05:24, 43.79it/s]

Ep 10800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 8


 44%|████▎     | 10905/25000 [04:28<05:15, 44.61it/s]

Ep 10900/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 8


 44%|████▍     | 11005/25000 [04:31<05:14, 44.45it/s]

Ep 11000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 44%|████▍     | 11105/25000 [04:33<06:14, 37.07it/s]

Ep 11100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 45%|████▍     | 11205/25000 [04:36<06:47, 33.81it/s]

Ep 11200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 45%|████▌     | 11306/25000 [04:39<05:12, 43.78it/s]

Ep 11300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 0


 46%|████▌     | 11406/25000 [04:41<05:08, 44.13it/s]

Ep 11400/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.6, First Action 6


 46%|████▌     | 11506/25000 [04:43<05:03, 44.44it/s]

Ep 11500/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 46%|████▋     | 11606/25000 [04:46<04:55, 45.34it/s]

Ep 11600/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 1


 47%|████▋     | 11707/25000 [04:48<05:54, 37.48it/s]

Ep 11700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 47%|████▋     | 11803/25000 [04:51<06:30, 33.77it/s]

Ep 11800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 8


 48%|████▊     | 11907/25000 [04:53<05:00, 43.62it/s]

Ep 11900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 8


 48%|████▊     | 12007/25000 [04:56<04:51, 44.61it/s]

Ep 12000/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 48%|████▊     | 12107/25000 [04:58<04:50, 44.32it/s]

Ep 12100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 49%|████▉     | 12207/25000 [05:00<04:48, 44.38it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 4


 49%|████▉     | 12307/25000 [05:03<05:01, 42.04it/s]

Ep 12300/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 0


 50%|████▉     | 12405/25000 [05:05<05:37, 37.32it/s]

Ep 12400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.5, First Action 1


 50%|█████     | 12508/25000 [05:08<05:00, 41.56it/s]

Ep 12500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 50%|█████     | 12608/25000 [05:10<04:35, 45.02it/s]

Ep 12600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 51%|█████     | 12708/25000 [05:13<04:34, 44.76it/s]

Ep 12700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 51%|█████     | 12808/25000 [05:15<04:36, 44.15it/s]

Ep 12800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 52%|█████▏    | 12908/25000 [05:17<04:35, 43.95it/s]

Ep 12900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 52%|█████▏    | 13006/25000 [05:20<05:11, 38.47it/s]

Ep 13000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 52%|█████▏    | 13103/25000 [05:23<05:53, 33.62it/s]

Ep 13100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 0


 53%|█████▎    | 13206/25000 [05:25<04:30, 43.54it/s]

Ep 13200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 3


 53%|█████▎    | 13306/25000 [05:27<04:19, 45.01it/s]

Ep 13300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 54%|█████▎    | 13406/25000 [05:29<04:19, 44.72it/s]

Ep 13400/25000, Opt. Action: 2, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 2


 54%|█████▍    | 13506/25000 [05:32<04:24, 43.43it/s]

Ep 13500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 54%|█████▍    | 13604/25000 [05:34<05:27, 34.76it/s]

Ep 13600/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 1


 55%|█████▍    | 13704/25000 [05:37<05:34, 33.74it/s]

Ep 13700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 4


 55%|█████▌    | 13805/25000 [05:40<04:11, 44.43it/s]

Ep 13800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 9


 56%|█████▌    | 13905/25000 [05:42<04:11, 44.13it/s]

Ep 13900/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 7


 56%|█████▌    | 14005/25000 [05:44<04:08, 44.27it/s]

Ep 14000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 56%|█████▋    | 14105/25000 [05:47<04:04, 44.48it/s]

Ep 14100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 57%|█████▋    | 14206/25000 [05:49<04:58, 36.11it/s]

Ep 14200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 7


 57%|█████▋    | 14307/25000 [05:52<04:35, 38.85it/s]

Ep 14300/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 0


 58%|█████▊    | 14408/25000 [05:54<04:05, 43.17it/s]

Ep 14400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 58%|█████▊    | 14508/25000 [05:57<03:51, 45.33it/s]

Ep 14500/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 9


 58%|█████▊    | 14608/25000 [05:59<03:52, 44.78it/s]

Ep 14600/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.6, First Action 9


 59%|█████▉    | 14708/25000 [06:01<03:59, 43.03it/s]

Ep 14700/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 4


 59%|█████▉    | 14808/25000 [06:04<03:47, 44.84it/s]

Ep 14800/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 1


 60%|█████▉    | 14904/25000 [06:06<04:42, 35.68it/s]

Ep 14900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 7


 60%|██████    | 15008/25000 [06:09<04:20, 38.30it/s]

Ep 15000/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.6, First Action 1


 60%|██████    | 15107/25000 [06:11<03:38, 45.26it/s]

Ep 15100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 61%|██████    | 15207/25000 [06:14<03:47, 42.96it/s]

Ep 15200/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.3, First Action 0


 61%|██████    | 15307/25000 [06:16<03:39, 44.26it/s]

Ep 15300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 62%|██████▏   | 15407/25000 [06:18<03:34, 44.69it/s]

Ep 15400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 4


 62%|██████▏   | 15504/25000 [06:21<04:05, 38.62it/s]

Ep 15500/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


 62%|██████▏   | 15604/25000 [06:24<04:40, 33.45it/s]

Ep 15600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 0


 63%|██████▎   | 15706/25000 [06:26<03:30, 44.18it/s]

Ep 15700/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.7, First Action 5


 63%|██████▎   | 15806/25000 [06:28<03:27, 44.27it/s]

Ep 15800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 64%|██████▎   | 15906/25000 [06:31<03:25, 44.24it/s]

Ep 15900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 64%|██████▍   | 16006/25000 [06:33<03:23, 44.09it/s]

Ep 16000/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 9


 64%|██████▍   | 16105/25000 [06:35<04:09, 35.59it/s]

Ep 16100/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 65%|██████▍   | 16205/25000 [06:38<04:20, 33.77it/s]

Ep 16200/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 9


 65%|██████▌   | 16304/25000 [06:41<03:20, 43.38it/s]

Ep 16300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 66%|██████▌   | 16409/25000 [06:43<03:12, 44.74it/s]

Ep 16400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 2


 66%|██████▌   | 16509/25000 [06:45<03:09, 44.92it/s]

Ep 16500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 1


 66%|██████▋   | 16609/25000 [06:48<03:09, 44.24it/s]

Ep 16600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 67%|██████▋   | 16707/25000 [06:50<03:51, 35.77it/s]

Ep 16700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 67%|██████▋   | 16804/25000 [06:53<03:53, 35.13it/s]

Ep 16800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 9


 68%|██████▊   | 16907/25000 [06:56<03:08, 42.95it/s]

Ep 16900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 68%|██████▊   | 17007/25000 [06:58<03:02, 43.68it/s]

Ep 17000/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 68%|██████▊   | 17107/25000 [07:00<02:58, 44.31it/s]

Ep 17100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 8


 69%|██████▉   | 17207/25000 [07:02<02:54, 44.71it/s]

Ep 17200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 4


 69%|██████▉   | 17307/25000 [07:05<03:00, 42.70it/s]

Ep 17300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 70%|██████▉   | 17404/25000 [07:07<03:28, 36.41it/s]

Ep 17400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 0


 70%|███████   | 17506/25000 [07:10<03:36, 34.65it/s]

Ep 17500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 5


 70%|███████   | 17605/25000 [07:13<02:44, 44.95it/s]

Ep 17600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 71%|███████   | 17705/25000 [07:15<02:46, 43.85it/s]

Ep 17700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.7, First Action 7


 71%|███████   | 17805/25000 [07:17<02:42, 44.23it/s]

Ep 17800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 72%|███████▏  | 17905/25000 [07:19<02:41, 43.93it/s]

Ep 17900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 3


 72%|███████▏  | 18007/25000 [07:22<03:02, 38.40it/s]

Ep 18000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 7


 72%|███████▏  | 18107/25000 [07:25<03:19, 34.51it/s]

Ep 18100/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.0, First Action 8


 73%|███████▎  | 18205/25000 [07:27<02:36, 43.32it/s]

Ep 18200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 73%|███████▎  | 18305/25000 [07:30<02:30, 44.45it/s]

Ep 18300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 74%|███████▎  | 18405/25000 [07:32<02:32, 43.15it/s]

Ep 18400/25000, Opt. Action: 7, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 74%|███████▍  | 18505/25000 [07:34<02:30, 43.27it/s]

Ep 18500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 74%|███████▍  | 18605/25000 [07:37<02:48, 37.89it/s]

Ep 18600/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.7, First Action 1


 75%|███████▍  | 18706/25000 [07:39<03:06, 33.77it/s]

Ep 18700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 75%|███████▌  | 18809/25000 [07:42<02:18, 44.79it/s]

Ep 18800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 1


 76%|███████▌  | 18904/25000 [07:44<02:22, 42.64it/s]

Ep 18900/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.4, First Action 1


 76%|███████▌  | 19009/25000 [07:47<02:16, 43.97it/s]

Ep 19000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 76%|███████▋  | 19104/25000 [07:49<02:14, 43.91it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 77%|███████▋  | 19204/25000 [07:51<02:28, 39.04it/s]

Ep 19200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 2


 77%|███████▋  | 19304/25000 [07:54<02:38, 36.04it/s]

Ep 19300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 5


 78%|███████▊  | 19408/25000 [07:57<02:15, 41.31it/s]

Ep 19400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 1


 78%|███████▊  | 19508/25000 [07:59<02:05, 43.72it/s]

Ep 19500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 78%|███████▊  | 19608/25000 [08:01<02:02, 44.08it/s]

Ep 19600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 79%|███████▉  | 19708/25000 [08:04<01:59, 44.41it/s]

Ep 19700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 9


 79%|███████▉  | 19808/25000 [08:06<01:56, 44.42it/s]

Ep 19800/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 80%|███████▉  | 19907/25000 [08:09<02:16, 37.33it/s]

Ep 19900/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 3


 80%|████████  | 20003/25000 [08:11<02:28, 33.58it/s]

Ep 20000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 6


 80%|████████  | 20105/25000 [08:14<01:54, 42.67it/s]

Ep 20100/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 1


 81%|████████  | 20205/25000 [08:16<01:48, 44.30it/s]

Ep 20200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 9


 81%|████████  | 20305/25000 [08:18<01:44, 45.11it/s]

Ep 20300/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.5, First Action 1


 82%|████████▏ | 20405/25000 [08:21<01:42, 44.98it/s]

Ep 20400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 82%|████████▏ | 20505/25000 [08:23<02:01, 36.91it/s]

Ep 20500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 3


 82%|████████▏ | 20605/25000 [08:26<02:08, 34.33it/s]

Ep 20600/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 2


 83%|████████▎ | 20704/25000 [08:29<01:40, 42.89it/s]

Ep 20700/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 9


 83%|████████▎ | 20804/25000 [08:31<01:40, 41.71it/s]

Ep 20800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 1


 84%|████████▎ | 20909/25000 [08:33<01:31, 44.60it/s]

Ep 20900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 84%|████████▍ | 21009/25000 [08:35<01:29, 44.63it/s]

Ep 21000/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 9


 84%|████████▍ | 21104/25000 [08:38<01:27, 44.29it/s]

Ep 21100/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.4, First Action 9


 85%|████████▍ | 21207/25000 [08:41<01:45, 35.99it/s]

Ep 21200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 85%|████████▌ | 21306/25000 [08:43<01:34, 39.21it/s]

Ep 21300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 86%|████████▌ | 21405/25000 [08:46<01:20, 44.41it/s]

Ep 21400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.5, First Action 9


 86%|████████▌ | 21505/25000 [08:48<01:19, 43.99it/s]

Ep 21500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 9


 86%|████████▋ | 21605/25000 [08:50<01:15, 44.76it/s]

Ep 21600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9


 87%|████████▋ | 21705/25000 [08:52<01:14, 44.05it/s]

Ep 21700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 9


 87%|████████▋ | 21806/25000 [08:55<01:23, 38.29it/s]

Ep 21800/25000, Opt. Action: 5, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.9, First Action 1


 88%|████████▊ | 21906/25000 [08:58<01:31, 33.67it/s]

Ep 21900/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 1


 88%|████████▊ | 22005/25000 [09:00<01:07, 44.12it/s]

Ep 22000/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 2


 88%|████████▊ | 22105/25000 [09:03<01:05, 44.10it/s]

Ep 22100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 9


 89%|████████▉ | 22205/25000 [09:05<01:03, 43.85it/s]

Ep 22200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.6, First Action 8


 89%|████████▉ | 22305/25000 [09:07<01:00, 44.48it/s]

Ep 22300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 1


 90%|████████▉ | 22407/25000 [09:10<01:11, 36.51it/s]

Ep 22400/25000, Opt. Action: 1, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.3, First Action 5


 90%|█████████ | 22507/25000 [09:13<01:11, 34.84it/s]

Ep 22500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 6


 90%|█████████ | 22607/25000 [09:15<00:54, 44.28it/s]

Ep 22600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.1, First Action 9


 91%|█████████ | 22707/25000 [09:17<00:51, 44.73it/s]

Ep 22700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 0


 91%|█████████ | 22807/25000 [09:20<00:49, 44.07it/s]

Ep 22800/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 17.8, First Action 0


 92%|█████████▏| 22907/25000 [09:22<00:48, 43.60it/s]

Ep 22900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.9, First Action 8


 92%|█████████▏| 23005/25000 [09:24<00:54, 36.77it/s]

Ep 23000/25000, Opt. Action: 9, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.4, First Action 2


 92%|█████████▏| 23106/25000 [09:27<00:55, 33.97it/s]

Ep 23100/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.1, First Action 9


 93%|█████████▎| 23209/25000 [09:30<00:40, 44.14it/s]

Ep 23200/25000, Opt. Action: 2, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 1


 93%|█████████▎| 23304/25000 [09:32<00:38, 43.57it/s]

Ep 23300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 7


 94%|█████████▎| 23409/25000 [09:35<00:35, 44.37it/s]

Ep 23400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 94%|█████████▍| 23509/25000 [09:37<00:33, 44.26it/s]

Ep 23500/25000, Opt. Action: 3, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 0


 94%|█████████▍| 23609/25000 [09:39<00:31, 44.54it/s]

Ep 23600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 9


 95%|█████████▍| 23707/25000 [09:42<00:35, 36.36it/s]

Ep 23700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 1


 95%|█████████▌| 23803/25000 [09:44<00:32, 36.31it/s]

Ep 23800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 1


 96%|█████████▌| 23909/25000 [09:47<00:24, 43.97it/s]

Ep 23900/25000, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 7


 96%|█████████▌| 24009/25000 [09:49<00:23, 43.01it/s]

Ep 24000/25000, Opt. Action: 0, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


 96%|█████████▋| 24109/25000 [09:52<00:19, 44.66it/s]

Ep 24100/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 17.9, First Action 1


 97%|█████████▋| 24204/25000 [09:54<00:17, 44.30it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 17.0, Cumulative-Regret: 8.0, AVG100-Regret: 17.9, First Action 1


 97%|█████████▋| 24305/25000 [09:56<00:20, 33.81it/s]

Ep 24300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 3


 98%|█████████▊| 24407/25000 [09:59<00:16, 35.28it/s]

Ep 24400/25000, Opt. Action: 7, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.1, First Action 1


 98%|█████████▊| 24509/25000 [10:02<00:11, 44.54it/s]

Ep 24500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.3, First Action 5


 98%|█████████▊| 24609/25000 [10:04<00:08, 44.46it/s]

Ep 24600/25000, Opt. Action: 9, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 1


 99%|█████████▉| 24709/25000 [10:06<00:06, 43.85it/s]

Ep 24700/25000, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.0, First Action 1


 99%|█████████▉| 24809/25000 [10:09<00:04, 44.93it/s]

Ep 24800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


100%|█████████▉| 24905/25000 [10:11<00:02, 37.08it/s]

Ep 24900/25000, Opt. Action: 8, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.0, First Action 2


100%|██████████| 25000/25000 [10:13<00:00, 40.72it/s]


Ep 25000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 9

TEST:


 37%|███▋      | 112/300 [00:01<00:01, 97.08it/s]

Ep 100/300, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 2


 71%|███████▏  | 214/300 [00:02<00:00, 98.94it/s] 

Ep 200/300, Opt. Action: 6, Reward: 9.0, Cumulative-Regret: 16.0, AVG100-Regret: 18.2, First Action 9


100%|██████████| 300/300 [00:02<00:00, 109.57it/s]


Ep 300/300, Opt. Action: 1, Reward: 13.0, Cumulative-Regret: 12.0, AVG100-Regret: 18.2, First Action 1

GAMMA 1 - LR 0.1 - Entropy Decay True


  0%|          | 106/25000 [00:02<09:26, 43.97it/s]

Ep 100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  1%|          | 206/25000 [00:04<09:13, 44.79it/s]

Ep 200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  1%|          | 306/25000 [00:07<09:40, 42.57it/s]

Ep 300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


  2%|▏         | 406/25000 [00:09<09:13, 44.47it/s]

Ep 400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  2%|▏         | 507/25000 [00:12<11:05, 36.80it/s]

Ep 500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  2%|▏         | 608/25000 [00:14<10:51, 37.46it/s]

Ep 600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


  3%|▎         | 707/25000 [00:17<09:01, 44.87it/s]

Ep 700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  3%|▎         | 807/25000 [00:19<09:23, 42.91it/s]

Ep 800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  4%|▎         | 907/25000 [00:21<09:00, 44.58it/s]

Ep 900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  4%|▍         | 1007/25000 [00:23<08:55, 44.84it/s]

Ep 1000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


  4%|▍         | 1107/25000 [00:26<11:04, 35.97it/s]

Ep 1100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


  5%|▍         | 1203/25000 [00:29<12:35, 31.51it/s]

Ep 1200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


  5%|▌         | 1305/25000 [00:31<08:49, 44.74it/s]

Ep 1300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  6%|▌         | 1405/25000 [00:34<08:55, 44.03it/s]

Ep 1400/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


  6%|▌         | 1505/25000 [00:36<08:57, 43.68it/s]

Ep 1500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


  6%|▋         | 1605/25000 [00:38<08:54, 43.78it/s]

Ep 1600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


  7%|▋         | 1704/25000 [00:41<10:15, 37.86it/s]

Ep 1700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


  7%|▋         | 1805/25000 [00:43<10:36, 36.47it/s]

Ep 1800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


  8%|▊         | 1908/25000 [00:46<09:01, 42.67it/s]

Ep 1900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


  8%|▊         | 2008/25000 [00:49<08:33, 44.77it/s]

Ep 2000/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 5


  8%|▊         | 2108/25000 [00:51<08:40, 43.94it/s]

Ep 2100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


  9%|▉         | 2208/25000 [00:53<08:36, 44.16it/s]

Ep 2200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


  9%|▉         | 2308/25000 [00:55<08:38, 43.79it/s]

Ep 2300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 10%|▉         | 2407/25000 [00:58<10:16, 36.62it/s]

Ep 2400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 10%|█         | 2508/25000 [01:01<10:08, 36.99it/s]

Ep 2500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 10%|█         | 2607/25000 [01:03<08:21, 44.63it/s]

Ep 2600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 11%|█         | 2707/25000 [01:06<08:16, 44.88it/s]

Ep 2700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 11%|█         | 2807/25000 [01:08<08:27, 43.71it/s]

Ep 2800/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 12%|█▏        | 2907/25000 [01:10<08:29, 43.32it/s]

Ep 2900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 12%|█▏        | 3005/25000 [01:13<09:32, 38.41it/s]

Ep 3000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 12%|█▏        | 3105/25000 [01:15<10:43, 34.04it/s]

Ep 3100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 13%|█▎        | 3205/25000 [01:18<08:09, 44.57it/s]

Ep 3200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 13%|█▎        | 3305/25000 [01:20<08:02, 44.94it/s]

Ep 3300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 14%|█▎        | 3405/25000 [01:23<08:14, 43.70it/s]

Ep 3400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 14%|█▍        | 3505/25000 [01:25<08:16, 43.32it/s]

Ep 3500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 14%|█▍        | 3603/25000 [01:27<10:18, 34.59it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 5


 15%|█▍        | 3704/25000 [01:30<09:41, 36.65it/s]

Ep 3700/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 5


 15%|█▌        | 3807/25000 [01:33<08:08, 43.40it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 16%|█▌        | 3907/25000 [01:35<07:47, 45.13it/s]

Ep 3900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 16%|█▌        | 4007/25000 [01:37<07:56, 44.03it/s]

Ep 4000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 5


 16%|█▋        | 4107/25000 [01:40<07:50, 44.40it/s]

Ep 4100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 17%|█▋        | 4207/25000 [01:42<08:05, 42.82it/s]

Ep 4200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 17%|█▋        | 4306/25000 [01:45<09:38, 35.76it/s]

Ep 4300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 18%|█▊        | 4406/25000 [01:48<09:50, 34.87it/s]

Ep 4400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 18%|█▊        | 4504/25000 [01:50<08:04, 42.33it/s]

Ep 4500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 18%|█▊        | 4609/25000 [01:52<07:46, 43.68it/s]

Ep 4600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 19%|█▉        | 4704/25000 [01:55<07:50, 43.10it/s]

Ep 4700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 19%|█▉        | 4809/25000 [01:57<07:37, 44.10it/s]

Ep 4800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 20%|█▉        | 4903/25000 [02:00<09:18, 35.98it/s]

Ep 4900/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 16.4, First Action 5


 20%|██        | 5003/25000 [02:02<09:49, 33.94it/s]

Ep 5000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 20%|██        | 5108/25000 [02:05<07:34, 43.80it/s]

Ep 5100/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 5


 21%|██        | 5208/25000 [02:07<07:23, 44.65it/s]

Ep 5200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 21%|██        | 5308/25000 [02:10<07:18, 44.89it/s]

Ep 5300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 22%|██▏       | 5408/25000 [02:12<07:22, 44.30it/s]

Ep 5400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 22%|██▏       | 5505/25000 [02:14<09:01, 36.03it/s]

Ep 5500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 22%|██▏       | 5603/25000 [02:17<08:53, 36.35it/s]

Ep 5600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 23%|██▎       | 5707/25000 [02:20<07:18, 43.95it/s]

Ep 5700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 23%|██▎       | 5807/25000 [02:22<07:07, 44.87it/s]

Ep 5800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 24%|██▎       | 5907/25000 [02:24<07:05, 44.89it/s]

Ep 5900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 24%|██▍       | 6007/25000 [02:26<07:02, 44.95it/s]

Ep 6000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 24%|██▍       | 6104/25000 [02:29<08:35, 36.63it/s]

Ep 6100/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.0, First Action 5


 25%|██▍       | 6204/25000 [02:32<08:33, 36.61it/s]

Ep 6200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 25%|██▌       | 6307/25000 [02:34<07:12, 43.19it/s]

Ep 6300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 26%|██▌       | 6407/25000 [02:37<06:57, 44.51it/s]

Ep 6400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 26%|██▌       | 6507/25000 [02:39<06:55, 44.53it/s]

Ep 6500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 26%|██▋       | 6607/25000 [02:41<06:58, 43.96it/s]

Ep 6600/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 27%|██▋       | 6707/25000 [02:44<06:54, 44.18it/s]

Ep 6700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 27%|██▋       | 6806/25000 [02:46<08:14, 36.79it/s]

Ep 6800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.6, First Action 5


 28%|██▊       | 6907/25000 [02:49<08:13, 36.66it/s]

Ep 6900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 28%|██▊       | 7006/25000 [02:51<06:45, 44.42it/s]

Ep 7000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 28%|██▊       | 7106/25000 [02:54<06:43, 44.37it/s]

Ep 7100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 29%|██▉       | 7206/25000 [02:56<06:45, 43.88it/s]

Ep 7200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 29%|██▉       | 7306/25000 [02:58<06:39, 44.25it/s]

Ep 7300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 30%|██▉       | 7407/25000 [03:01<07:53, 37.12it/s]

Ep 7400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 30%|███       | 7507/25000 [03:04<08:18, 35.06it/s]

Ep 7500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 30%|███       | 7608/25000 [03:06<06:29, 44.66it/s]

Ep 7600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 31%|███       | 7708/25000 [03:08<06:31, 44.17it/s]

Ep 7700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 31%|███       | 7808/25000 [03:11<06:28, 44.29it/s]

Ep 7800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 32%|███▏      | 7908/25000 [03:13<06:32, 43.54it/s]

Ep 7900/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 5


 32%|███▏      | 8004/25000 [03:15<07:49, 36.23it/s]

Ep 8000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 32%|███▏      | 8104/25000 [03:18<08:07, 34.68it/s]

Ep 8100/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 33%|███▎      | 8207/25000 [03:21<06:40, 41.89it/s]

Ep 8200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 33%|███▎      | 8307/25000 [03:23<06:17, 44.25it/s]

Ep 8300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 34%|███▎      | 8407/25000 [03:26<06:12, 44.53it/s]

Ep 8400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 34%|███▍      | 8507/25000 [03:28<06:13, 44.17it/s]

Ep 8500/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 34%|███▍      | 8607/25000 [03:30<06:03, 45.15it/s]

Ep 8600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 35%|███▍      | 8706/25000 [03:33<07:31, 36.05it/s]

Ep 8700/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 5


 35%|███▌      | 8806/25000 [03:36<07:46, 34.71it/s]

Ep 8800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 36%|███▌      | 8908/25000 [03:38<06:05, 44.06it/s]

Ep 8900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 36%|███▌      | 9008/25000 [03:40<05:58, 44.61it/s]

Ep 9000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 36%|███▋      | 9108/25000 [03:43<05:59, 44.26it/s]

Ep 9100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 37%|███▋      | 9208/25000 [03:45<05:58, 44.00it/s]

Ep 9200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 37%|███▋      | 9307/25000 [03:48<06:57, 37.57it/s]

Ep 9300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 38%|███▊      | 9403/25000 [03:50<07:53, 32.95it/s]

Ep 9400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 38%|███▊      | 9506/25000 [03:53<05:58, 43.18it/s]

Ep 9500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 38%|███▊      | 9606/25000 [03:55<05:47, 44.35it/s]

Ep 9600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 39%|███▉      | 9706/25000 [03:57<05:42, 44.60it/s]

Ep 9700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 39%|███▉      | 9806/25000 [04:00<05:47, 43.72it/s]

Ep 9800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 40%|███▉      | 9904/25000 [04:02<06:46, 37.10it/s]

Ep 9900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 40%|████      | 10007/25000 [04:05<06:32, 38.25it/s]

Ep 10000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 40%|████      | 10106/25000 [04:08<06:24, 38.74it/s]

Ep 10100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 5


 41%|████      | 10205/25000 [04:10<05:34, 44.26it/s]

Ep 10200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 41%|████      | 10305/25000 [04:12<05:34, 43.87it/s]

Ep 10300/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 42%|████▏     | 10405/25000 [04:14<05:32, 43.92it/s]

Ep 10400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 42%|████▏     | 10505/25000 [04:17<05:27, 44.31it/s]

Ep 10500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 42%|████▏     | 10604/25000 [04:19<06:12, 38.65it/s]

Ep 10600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 43%|████▎     | 10704/25000 [04:22<06:55, 34.40it/s]

Ep 10700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 43%|████▎     | 10807/25000 [04:25<05:21, 44.09it/s]

Ep 10800/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 44%|████▎     | 10907/25000 [04:27<05:23, 43.60it/s]

Ep 10900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 44%|████▍     | 11007/25000 [04:29<05:12, 44.74it/s]

Ep 11000/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 44%|████▍     | 11107/25000 [04:32<05:15, 44.00it/s]

Ep 11100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 45%|████▍     | 11204/25000 [04:34<06:30, 35.33it/s]

Ep 11200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 45%|████▌     | 11303/25000 [04:37<06:44, 33.82it/s]

Ep 11300/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 46%|████▌     | 11409/25000 [04:40<05:11, 43.68it/s]

Ep 11400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 46%|████▌     | 11509/25000 [04:42<05:08, 43.76it/s]

Ep 11500/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 46%|████▋     | 11609/25000 [04:44<05:03, 44.07it/s]

Ep 11600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 47%|████▋     | 11709/25000 [04:47<04:57, 44.73it/s]

Ep 11700/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.2, First Action 5


 47%|████▋     | 11804/25000 [04:49<05:49, 37.73it/s]

Ep 11800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 48%|████▊     | 11905/25000 [04:52<05:48, 37.60it/s]

Ep 11900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 48%|████▊     | 12005/25000 [04:54<05:12, 41.65it/s]

Ep 12000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 48%|████▊     | 12105/25000 [04:57<04:53, 43.99it/s]

Ep 12100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.6, First Action 5


 49%|████▉     | 12205/25000 [04:59<04:48, 44.33it/s]

Ep 12200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 49%|████▉     | 12305/25000 [05:01<04:46, 44.32it/s]

Ep 12300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 50%|████▉     | 12405/25000 [05:03<05:11, 40.45it/s]

Ep 12400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 50%|█████     | 12506/25000 [05:06<06:09, 33.80it/s]

Ep 12500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 50%|█████     | 12608/25000 [05:09<05:32, 37.28it/s]

Ep 12600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 51%|█████     | 12707/25000 [05:11<04:30, 45.37it/s]

Ep 12700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 51%|█████     | 12807/25000 [05:14<04:34, 44.34it/s]

Ep 12800/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 19.2, First Action 5


 52%|█████▏    | 12907/25000 [05:16<04:34, 44.07it/s]

Ep 12900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 52%|█████▏    | 13007/25000 [05:18<04:33, 43.90it/s]

Ep 13000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 52%|█████▏    | 13103/25000 [05:21<05:14, 37.82it/s]

Ep 13100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 53%|█████▎    | 13206/25000 [05:24<05:36, 35.00it/s]

Ep 13200/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 53%|█████▎    | 13306/25000 [05:26<04:26, 43.82it/s]

Ep 13300/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 54%|█████▎    | 13406/25000 [05:28<04:18, 44.83it/s]

Ep 13400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 54%|█████▍    | 13506/25000 [05:31<04:18, 44.50it/s]

Ep 13500/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 54%|█████▍    | 13606/25000 [05:33<04:20, 43.79it/s]

Ep 13600/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 55%|█████▍    | 13706/25000 [05:36<05:21, 35.17it/s]

Ep 13700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 5


 55%|█████▌    | 13807/25000 [05:38<05:18, 35.18it/s]

Ep 13800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 56%|█████▌    | 13905/25000 [05:41<04:19, 42.81it/s]

Ep 13900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 56%|█████▌    | 14005/25000 [05:43<04:11, 43.72it/s]

Ep 14000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 56%|█████▋    | 14105/25000 [05:46<04:10, 43.42it/s]

Ep 14100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 57%|█████▋    | 14205/25000 [05:48<04:07, 43.59it/s]

Ep 14200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 57%|█████▋    | 14305/25000 [05:50<04:01, 44.28it/s]

Ep 14300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 58%|█████▊    | 14405/25000 [05:53<04:55, 35.87it/s]

Ep 14400/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 58%|█████▊    | 14505/25000 [05:56<05:05, 34.33it/s]

Ep 14500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 58%|█████▊    | 14607/25000 [05:58<03:59, 43.46it/s]

Ep 14600/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 5


 59%|█████▉    | 14707/25000 [06:00<03:52, 44.33it/s]

Ep 14700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 59%|█████▉    | 14807/25000 [06:03<03:51, 44.11it/s]

Ep 14800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 60%|█████▉    | 14907/25000 [06:05<03:49, 44.03it/s]

Ep 14900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 60%|██████    | 15004/25000 [06:08<04:26, 37.47it/s]

Ep 15000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 60%|██████    | 15104/25000 [06:11<04:54, 33.57it/s]

Ep 15100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 61%|██████    | 15207/25000 [06:13<03:43, 43.91it/s]

Ep 15200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 61%|██████    | 15307/25000 [06:15<03:38, 44.28it/s]

Ep 15300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 62%|██████▏   | 15407/25000 [06:18<03:44, 42.73it/s]

Ep 15400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 62%|██████▏   | 15507/25000 [06:20<03:39, 43.24it/s]

Ep 15500/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 62%|██████▏   | 15606/25000 [06:22<04:32, 34.48it/s]

Ep 15600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 63%|██████▎   | 15706/25000 [06:25<04:18, 35.99it/s]

Ep 15700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 63%|██████▎   | 15806/25000 [06:28<03:32, 43.35it/s]

Ep 15800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 64%|██████▎   | 15906/25000 [06:30<03:24, 44.46it/s]

Ep 15900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 64%|██████▍   | 16006/25000 [06:32<03:20, 44.76it/s]

Ep 16000/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.2, First Action 5


 64%|██████▍   | 16106/25000 [06:35<03:29, 42.36it/s]

Ep 16100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 65%|██████▍   | 16206/25000 [06:37<03:39, 40.10it/s]

Ep 16200/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 5


 65%|██████▌   | 16307/25000 [06:40<04:01, 36.00it/s]

Ep 16300/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 66%|██████▌   | 16405/25000 [06:43<03:39, 39.09it/s]

Ep 16400/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 5


 66%|██████▌   | 16508/25000 [06:45<03:11, 44.30it/s]

Ep 16500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 66%|██████▋   | 16608/25000 [06:47<03:09, 44.30it/s]

Ep 16600/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 67%|██████▋   | 16708/25000 [06:50<03:06, 44.48it/s]

Ep 16700/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 67%|██████▋   | 16808/25000 [06:52<03:06, 44.04it/s]

Ep 16800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


 68%|██████▊   | 16904/25000 [06:54<03:31, 38.26it/s]

Ep 16900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 68%|██████▊   | 17004/25000 [06:57<04:02, 32.98it/s]

Ep 17000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 68%|██████▊   | 17106/25000 [07:00<03:04, 42.83it/s]

Ep 17100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 69%|██████▉   | 17206/25000 [07:02<02:58, 43.75it/s]

Ep 17200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 69%|██████▉   | 17306/25000 [07:04<02:54, 44.22it/s]

Ep 17300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 70%|██████▉   | 17406/25000 [07:07<02:53, 43.86it/s]

Ep 17400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 70%|███████   | 17504/25000 [07:09<03:28, 35.93it/s]

Ep 17500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 70%|███████   | 17604/25000 [07:12<03:29, 35.39it/s]

Ep 17600/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 71%|███████   | 17706/25000 [07:15<02:53, 42.06it/s]

Ep 17700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 71%|███████   | 17806/25000 [07:17<02:42, 44.21it/s]

Ep 17800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 72%|███████▏  | 17906/25000 [07:19<02:40, 44.31it/s]

Ep 17900/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 72%|███████▏  | 18006/25000 [07:22<02:36, 44.71it/s]

Ep 18000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 72%|███████▏  | 18106/25000 [07:24<02:37, 43.79it/s]

Ep 18100/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 73%|███████▎  | 18206/25000 [07:27<03:11, 35.46it/s]

Ep 18200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 73%|███████▎  | 18303/25000 [07:29<03:22, 33.11it/s]

Ep 18300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 74%|███████▎  | 18405/25000 [07:32<02:31, 43.47it/s]

Ep 18400/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.4, First Action 5


 74%|███████▍  | 18505/25000 [07:34<02:26, 44.41it/s]

Ep 18500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 74%|███████▍  | 18605/25000 [07:36<02:24, 44.14it/s]

Ep 18600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 75%|███████▍  | 18705/25000 [07:39<02:24, 43.53it/s]

Ep 18700/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 75%|███████▌  | 18807/25000 [07:41<02:47, 36.90it/s]

Ep 18800/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 76%|███████▌  | 18907/25000 [07:44<02:58, 34.15it/s]

Ep 18900/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 76%|███████▌  | 19006/25000 [07:47<02:15, 44.16it/s]

Ep 19000/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 5


 76%|███████▋  | 19106/25000 [07:49<02:13, 44.23it/s]

Ep 19100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 77%|███████▋  | 19206/25000 [07:51<02:13, 43.27it/s]

Ep 19200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 77%|███████▋  | 19306/25000 [07:54<02:10, 43.49it/s]

Ep 19300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 78%|███████▊  | 19407/25000 [07:56<02:31, 36.82it/s]

Ep 19400/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 78%|███████▊  | 19504/25000 [07:59<02:26, 37.49it/s]

Ep 19500/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


 78%|███████▊  | 19607/25000 [08:02<02:13, 40.47it/s]

Ep 19600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


 79%|███████▉  | 19706/25000 [08:04<02:03, 42.92it/s]

Ep 19700/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 79%|███████▉  | 19806/25000 [08:06<01:59, 43.33it/s]

Ep 19800/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 80%|███████▉  | 19906/25000 [08:08<01:54, 44.37it/s]

Ep 19900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 80%|████████  | 20006/25000 [08:11<01:51, 44.67it/s]

Ep 20000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 80%|████████  | 20105/25000 [08:13<02:15, 36.14it/s]

Ep 20100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 81%|████████  | 20205/25000 [08:16<02:20, 34.21it/s]

Ep 20200/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.4, First Action 5


 81%|████████  | 20307/25000 [08:19<01:47, 43.63it/s]

Ep 20300/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 82%|████████▏ | 20407/25000 [08:21<01:44, 43.80it/s]

Ep 20400/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 82%|████████▏ | 20507/25000 [08:23<01:41, 44.11it/s]

Ep 20500/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 82%|████████▏ | 20607/25000 [08:26<01:39, 44.19it/s]

Ep 20600/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 83%|████████▎ | 20706/25000 [08:28<02:03, 34.76it/s]

Ep 20700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.2, First Action 5


 83%|████████▎ | 20803/25000 [08:31<01:58, 35.52it/s]

Ep 20800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 84%|████████▎ | 20908/25000 [08:34<01:34, 43.44it/s]

Ep 20900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 84%|████████▍ | 21008/25000 [08:36<01:33, 42.63it/s]

Ep 21000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 84%|████████▍ | 21108/25000 [08:38<01:27, 44.45it/s]

Ep 21100/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 85%|████████▍ | 21208/25000 [08:41<01:26, 43.77it/s]

Ep 21200/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 85%|████████▌ | 21303/25000 [08:43<01:39, 37.17it/s]

Ep 21300/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 86%|████████▌ | 21404/25000 [08:46<01:35, 37.54it/s]

Ep 21400/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 86%|████████▌ | 21506/25000 [08:49<01:34, 36.80it/s]

Ep 21500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 86%|████████▋ | 21605/25000 [08:51<01:17, 43.85it/s]

Ep 21600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 87%|████████▋ | 21705/25000 [08:53<01:15, 43.64it/s]

Ep 21700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 87%|████████▋ | 21805/25000 [08:55<01:13, 43.45it/s]

Ep 21800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


 88%|████████▊ | 21905/25000 [08:58<01:10, 44.13it/s]

Ep 21900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 88%|████████▊ | 22007/25000 [09:00<01:18, 38.31it/s]

Ep 22000/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 88%|████████▊ | 22107/25000 [09:03<01:24, 34.06it/s]

Ep 22100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 89%|████████▉ | 22203/25000 [09:06<01:03, 44.05it/s]

Ep 22200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 89%|████████▉ | 22308/25000 [09:08<01:01, 43.82it/s]

Ep 22300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 90%|████████▉ | 22408/25000 [09:10<00:58, 44.21it/s]

Ep 22400/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5


 90%|█████████ | 22508/25000 [09:13<00:56, 44.30it/s]

Ep 22500/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 5


 90%|█████████ | 22605/25000 [09:15<01:06, 36.14it/s]

Ep 22600/25000, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.2, First Action 5


 91%|█████████ | 22704/25000 [09:18<01:05, 34.81it/s]

Ep 22700/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


 91%|█████████ | 22806/25000 [09:21<00:51, 42.36it/s]

Ep 22800/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 92%|█████████▏| 22906/25000 [09:23<00:48, 42.99it/s]

Ep 22900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 92%|█████████▏| 23006/25000 [09:25<00:45, 43.73it/s]

Ep 23000/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 5


 92%|█████████▏| 23106/25000 [09:27<00:42, 44.32it/s]

Ep 23100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 93%|█████████▎| 23206/25000 [09:30<00:41, 43.53it/s]

Ep 23200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 93%|█████████▎| 23305/25000 [09:32<00:44, 38.26it/s]

Ep 23300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 94%|█████████▎| 23405/25000 [09:35<00:49, 32.55it/s]

Ep 23400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 94%|█████████▍| 23508/25000 [09:38<00:34, 43.41it/s]

Ep 23500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 94%|█████████▍| 23608/25000 [09:40<00:31, 44.07it/s]

Ep 23600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 95%|█████████▍| 23708/25000 [09:42<00:29, 43.80it/s]

Ep 23700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


 95%|█████████▌| 23808/25000 [09:45<00:27, 43.16it/s]

Ep 23800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.4, First Action 5


 96%|█████████▌| 23904/25000 [09:47<00:30, 35.90it/s]

Ep 23900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 96%|█████████▌| 24006/25000 [09:50<00:29, 33.68it/s]

Ep 24000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 96%|█████████▋| 24107/25000 [09:53<00:20, 44.27it/s]

Ep 24100/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 97%|█████████▋| 24207/25000 [09:55<00:18, 43.57it/s]

Ep 24200/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 5


 97%|█████████▋| 24307/25000 [09:57<00:15, 44.08it/s]

Ep 24300/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 5


 98%|█████████▊| 24407/25000 [10:00<00:13, 44.30it/s]

Ep 24400/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 5


 98%|█████████▊| 24506/25000 [10:02<00:13, 36.54it/s]

Ep 24500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


 98%|█████████▊| 24606/25000 [10:05<00:11, 35.21it/s]

Ep 24600/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 5


 99%|█████████▉| 24707/25000 [10:08<00:07, 41.37it/s]

Ep 24700/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 5


 99%|█████████▉| 24807/25000 [10:10<00:04, 43.48it/s]

Ep 24800/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 5


100%|█████████▉| 24907/25000 [10:12<00:02, 44.38it/s]

Ep 24900/25000, Opt. Action: 8, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 5


100%|██████████| 25000/25000 [10:14<00:00, 40.66it/s]


Ep 25000/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5

TEST:


 41%|████      | 123/300 [00:00<00:01, 137.81it/s]

Ep 100/300, Opt. Action: 5, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 5


 75%|███████▍  | 224/300 [00:01<00:00, 138.85it/s]

Ep 200/300, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 5


100%|██████████| 300/300 [00:02<00:00, 133.27it/s]


Ep 300/300, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 5

GAMMA 0.9 - LR 0.1 - Entropy Decay True


  0%|          | 104/25000 [00:02<11:26, 36.27it/s]

Ep 100/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


  1%|          | 204/25000 [00:05<12:17, 33.61it/s]

Ep 200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


  1%|          | 309/25000 [00:08<09:20, 44.08it/s]

Ep 300/25000, Opt. Action: 1, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  2%|▏         | 404/25000 [00:10<09:21, 43.80it/s]

Ep 400/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


  2%|▏         | 504/25000 [00:12<09:20, 43.69it/s]

Ep 500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 19.0, First Action 8


  2%|▏         | 609/25000 [00:15<09:05, 44.72it/s]

Ep 600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


  3%|▎         | 705/25000 [00:17<10:22, 39.03it/s]

Ep 700/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.6, First Action 8


  3%|▎         | 806/25000 [00:20<11:34, 34.83it/s]

Ep 800/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


  4%|▎         | 908/25000 [00:22<09:11, 43.68it/s]

Ep 900/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.0, First Action 8


  4%|▍         | 1008/25000 [00:25<09:17, 43.01it/s]

Ep 1000/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 16.8, First Action 8


  4%|▍         | 1108/25000 [00:27<09:02, 44.05it/s]

Ep 1100/25000, Opt. Action: 3, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


  5%|▍         | 1208/25000 [00:29<09:00, 44.04it/s]

Ep 1200/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  5%|▌         | 1304/25000 [00:32<10:53, 36.29it/s]

Ep 1300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  6%|▌         | 1406/25000 [00:34<10:57, 35.90it/s]

Ep 1400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


  6%|▌         | 1507/25000 [00:37<09:33, 40.97it/s]

Ep 1500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 8


  6%|▋         | 1607/25000 [00:40<08:52, 43.96it/s]

Ep 1600/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  7%|▋         | 1707/25000 [00:42<08:35, 45.18it/s]

Ep 1700/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


  7%|▋         | 1807/25000 [00:44<08:59, 43.02it/s]

Ep 1800/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


  8%|▊         | 1907/25000 [00:47<10:06, 38.06it/s]

Ep 1900/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  8%|▊         | 2007/25000 [00:50<10:23, 36.89it/s]

Ep 2000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


  8%|▊         | 2107/25000 [00:52<11:00, 34.67it/s]

Ep 2100/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


  9%|▉         | 2208/25000 [00:55<08:29, 44.72it/s]

Ep 2200/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


  9%|▉         | 2308/25000 [00:57<08:55, 42.37it/s]

Ep 2300/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 10%|▉         | 2408/25000 [00:59<08:50, 42.60it/s]

Ep 2400/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 10%|█         | 2508/25000 [01:02<08:34, 43.72it/s]

Ep 2500/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 8


 10%|█         | 2606/25000 [01:04<09:47, 38.11it/s]

Ep 2600/25000, Opt. Action: 9, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 11%|█         | 2706/25000 [01:07<10:49, 34.30it/s]

Ep 2700/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 11%|█         | 2806/25000 [01:10<08:21, 44.25it/s]

Ep 2800/25000, Opt. Action: 2, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 12%|█▏        | 2906/25000 [01:12<08:18, 44.28it/s]

Ep 2900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 12%|█▏        | 3006/25000 [01:14<08:20, 43.97it/s]

Ep 3000/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 12%|█▏        | 3106/25000 [01:17<08:20, 43.75it/s]

Ep 3100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 13%|█▎        | 3205/25000 [01:19<09:36, 37.79it/s]

Ep 3200/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 13%|█▎        | 3306/25000 [01:22<09:32, 37.87it/s]

Ep 3300/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 14%|█▎        | 3406/25000 [01:25<10:51, 33.12it/s]

Ep 3400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.4, First Action 8


 14%|█▍        | 3505/25000 [01:27<08:14, 43.50it/s]

Ep 3500/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 14%|█▍        | 3605/25000 [01:29<08:07, 43.87it/s]

Ep 3600/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 15%|█▍        | 3705/25000 [01:32<08:06, 43.77it/s]

Ep 3700/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.4, First Action 8


 15%|█▌        | 3805/25000 [01:34<08:15, 42.76it/s]

Ep 3800/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 18.0, First Action 8


 16%|█▌        | 3904/25000 [01:37<09:33, 36.81it/s]

Ep 3900/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 16%|█▌        | 4004/25000 [01:39<11:08, 31.43it/s]

Ep 4000/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 16%|█▋        | 4109/25000 [01:42<07:56, 43.85it/s]

Ep 4100/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 17%|█▋        | 4204/25000 [01:44<07:49, 44.28it/s]

Ep 4200/25000, Opt. Action: 6, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 17%|█▋        | 4304/25000 [01:47<08:07, 42.48it/s]

Ep 4300/25000, Opt. Action: 8, Reward: 25.0, Cumulative-Regret: 0.0, AVG100-Regret: 17.8, First Action 8


 18%|█▊        | 4409/25000 [01:49<07:52, 43.61it/s]

Ep 4400/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.8, First Action 8


 18%|█▊        | 4507/25000 [01:52<09:32, 35.82it/s]

Ep 4500/25000, Opt. Action: 7, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.0, First Action 8


 18%|█▊        | 4607/25000 [01:54<08:51, 38.37it/s]

Ep 4600/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.6, First Action 8


 19%|█▉        | 4707/25000 [01:57<08:10, 41.34it/s]

Ep 4700/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.2, First Action 8


 19%|█▉        | 4807/25000 [01:59<07:59, 42.12it/s]

Ep 4800/25000, Opt. Action: 4, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.2, First Action 8


 20%|█▉        | 4907/25000 [02:02<07:49, 42.83it/s]

Ep 4900/25000, Opt. Action: 0, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 18.8, First Action 8


 20%|██        | 5007/25000 [02:04<07:27, 44.67it/s]

Ep 5000/25000, Opt. Action: 5, Reward: 5.0, Cumulative-Regret: 20.0, AVG100-Regret: 17.6, First Action 8


 20%|██        | 5027/25000 [02:05<07:40, 43.33it/s]