In [0]:
import queue
import random
import torch
import copy
import os
import sys
import math
import argparse
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.utils.data
from torch import optim
from torch import nn
from torch import distributions as dist
from enum import Enum
import warnings
warnings.filterwarnings('ignore')

Gaussian and BNN Initialisation

In [0]:
# Load device 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using device: {}'.format(device))

# Mixture parameters for Gaussian
PI = 0.5

SIG_1 = torch.tensor([math.exp(-0)]) # sigma1
SIG_2 = torch.tensor([math.exp(-6)]) # sigma2

# Place tensor in GPU if device is cuda
if torch.cuda.is_available():
  SIG_1 = SIG_1.cuda()
  SIG_2 = SIG_2.cuda()

# Default parameters for Gaussian
Mu_PRI = 0 # mu of prior
SIG_PRI = torch.tensor([math.exp(-0)]) # sigma of prior

# place tensor in GPU if device is cuda
if torch.cuda.is_available():
    SIG_PRI = SIG_PRI.cuda()

# Initial weight hyperparameters
Mu_W = (-0.03, 0.03)
Rho_W = (-8, -7)
Mu_B = (-0.03, 0.03)
Rho_B = (-8, -7)

# Loss variance
SIGMA = torch.tensor([math.exp(-2)])

# place tensor in GPU if device is cuda 
if torch.cuda.is_available():
    SIGMA = SIGMA.cuda()

class PriorType(Enum):
  MIXTURE = 1
  GAUSSIAN = 2

class ActivationType(Enum):
  NONE = 0
  RELU = 1
  SOFTMAX = 2
  TANH = 3
  SIGMOID = 4

class TaskType(Enum):
  REGRESSION = 1
  CLASSIFICATION = 2



Using device: cpu


In [0]:
class GaussianMixture(object):

  def __init__(self, pi, sigma1, sigma2):
    self.pi = pi
    self.sigma1 = sigma1
    self.sigma2 = sigma2

  
  def log_prob(self, weights):
    new_weights = weights.view(-1)
    normal_den1 = dist.Normal(0,self.sigma1).log_prob(new_weights)
    exp_normal_den1 = torch.exp(normal_den1)
    exp_normal_den2 = torch.exp(
        dist.Normal(0.0, self.sigma2).log_prob(new_weights))
    nonzero = exp_normal_den2.nonzero()
    zero = (exp_normal_den2==0).nonzero()
    sum_log_prob = torch.sum(torch.log(self.pi * torch.take(exp_normal_den1,nonzero) \
                  + (1-self.pi)*torch.take(exp_normal_den2,nonzero))) \
                  + torch.sum(torch.take(normal_den1, zero)+np.log(self.pi))
    return sum_log_prob

BNN Definition

In [0]:
class BayesLayer(nn.Module):

  def __init__(self,
               input_size,
               output_size,
               prior_type=PriorType.MIXTURE,
               prior_params={'pi' : PI, 'sigma1' : SIG_1, 'sigma2' : SIG_2},
               activation_type=ActivationType.NONE,
               init_mu_weights=Mu_W,
               init_mu_bias=Mu_B,
               init_rho_weights=Rho_W,
               init_rho_bias=Rho_B
              ):
    super().__init__()
    self.input_size = input_size
    self.output_size = output_size
    self.activation_type = activation_type

    # torch variables
    if not torch.cuda.is_available():
        self.mu_weights = nn.Parameter(torch.Tensor(output_size, input_size))
        self.rho_weights = nn.Parameter(torch.Tensor(output_size, input_size))
        self.mu_bias = nn.Parameter(torch.Tensor(output_size))
        self.rho_bias = nn.Parameter(torch.Tensor(output_size))
        self.normal_dist = dist.Normal(torch.Tensor([0]), torch.Tensor([1]))
    else:
        self.mu_weights = nn.Parameter(torch.Tensor(output_size, input_size).cuda())
        self.rho_weights = nn.Parameter(torch.Tensor(output_size, input_size).cuda())
        self.mu_bias = nn.Parameter(torch.Tensor(output_size).cuda())
        self.rho_bias = nn.Parameter(torch.Tensor(output_size).cuda())
        self.normal_dist = dist.Normal(torch.Tensor([0]).cuda(), torch.Tensor([1]).cuda())

    # initialize variables
    self.mu_weights.data.uniform_(*init_mu_weights)
    self.rho_weights.data.uniform_(*init_rho_weights)
    self.mu_bias.data.uniform_(*init_mu_bias)
    self.rho_bias.data.uniform_(*init_rho_bias)

    if prior_type == PriorType.MIXTURE:
      self.prior_weights = GaussianMixture(
          prior_params['pi'], prior_params['sigma1'], prior_params['sigma2'])
      self.prior_bias = GaussianMixture(
          prior_params['pi'], prior_params['sigma1'], prior_params['sigma2'])
    else:
      self.prior_weights = dist.Normal(prior_params['mean'],
                                       prior_params['sigma'])
      self.prior_bias = dist.Normal(prior_params['mean'],
                                    prior_params['sigma'])
    self.log_prior = 0
    self.log_posterior = 0

  def gaussian_sample(self, mu, rho):
    epsilon = self.normal_dist.sample(rho.size()).squeeze(-1)
    return mu + torch.log(1 + torch.exp(rho)) * epsilon

  def forward(self, input_data, sample=False, debug=False, 
              avg_weights=False, avg_weight_count=2):

    if self.training or sample:
      
       
      # averaging weights
      if avg_weights:
        sum_weights = torch.zeros_like(self.mu_weights)
        sum_bias = torch.zeros_like(self.mu_bias)
        for _ in range(avg_weight_count):
           sum_weights += self.gaussian_sample(
             self.mu_weights, self.rho_weights)
           sum_bias += self.gaussian_sample(
             self.mu_bias, self.rho_bias)
        weights = sum_weights / avg_weight_count
        bias = sum_bias / avg_weight_count
      else:     
        weights = self.gaussian_sample(
          self.mu_weights, self.rho_weights)
        bias = self.gaussian_sample(
          self.mu_bias, self.rho_bias)

      self.log_prior = (self.prior_weights.log_prob(weights).sum() +
                        self.prior_bias.log_prob(bias).sum() )
      sigma_weights = torch.log(1 + torch.exp(self.rho_weights))
      sigma_bias = torch.log(1 + torch.exp(self.rho_bias))
      self.log_posterior = (
          dist.Normal(
              self.mu_weights, sigma_weights).log_prob(weights).sum() +
          dist.Normal(self.mu_bias, sigma_bias).log_prob(bias).sum()
      )

      if torch.isnan(self.log_posterior):
        print('Oops, nan in log_posterior')
        print('log_posterior for weights is {}'.format(dist.Normal(
          self.mu_weights, sigma_weights).log_prob(weights).sum()))
        print('log_posterior for bias is {}'.format(dist.Normal(
          self.mu_bias, sigma_bias).log_prob(bias).sum()))
    else:
      weights = self.mu_weights
      bias = self.mu_bias

    linear_output = nn.functional.linear(input_data, weights, bias)
    output = linear_output
    if self.activation_type == ActivationType.RELU:
      output = torch.relu(linear_output)
    elif self.activation_type == ActivationType.SOFTMAX:
      output = torch.log_softmax(linear_output, dim=1)
    elif self.activation_type == ActivationType.SIGMOID:
      output = torch.sigmoid(linear_output)
    elif self.activation_type == ActivationType.TANH:
      output = torch.tanh(linear_output)
    elif self.activation_type == ActivationType.NONE:
      output = linear_output
    else:
      raise ValueError('activation_type {} not support'.format(self.activation_type))
    return output

  def extra_repr(self):
    return 'Bayes Layer, in_size:{}, out_size:{}, activation_type:{}'.format(
      self.input_size, self.output_size, self.activation_type.name
    )

In [0]:
class BayesNN(nn.Module):

  def __init__(
      self,
      nn_input_size,
      layer_config=[100, 100, 10],           
      activation_config=[ActivationType.RELU, ActivationType.RELU, ActivationType.NONE],
      prior_type=PriorType.MIXTURE,
      prior_params={'pi' : PI, 'sigma1' : SIG_1, 'sigma2' : SIG_2},
      task_type=TaskType.REGRESSION,         
      init_mu_weights=Mu_W,
      init_mu_bias=Mu_B,
      init_rho_weights=Rho_W,
      init_rho_bias=Rho_B
  ):
    super().__init__()

    self.layers = nn.ModuleList([]) 
    self.input_size = nn_input_size
    for i, output_size in enumerate(layer_config):
      if i == 0:
        input_size = self.input_size
      else:
        input_size = layer_config[i-1]

      bayes_layer = BayesLayer(input_size, output_size,
                                     activation_type = activation_config[i],
                                     prior_type=prior_type,
                                     prior_params=prior_params,
                                     init_mu_weights=init_mu_weights,
                                     init_mu_bias=init_mu_bias,
                                     init_rho_weights=init_rho_weights,
                                     init_rho_bias=init_rho_bias)
      self.layers.append(bayes_layer)
    self.output_size = self.layers[-1].output_size
    self.task_type = task_type

  def forward(self, input_data, sample=True, debug=False,
              avg_weights=False, avg_weight_count=2):
    current_data = input_data
    for layer in self.layers:
      current_data = layer.forward(current_data, sample, debug=debug,
                                   avg_weights=avg_weights,
                                   avg_weight_count=avg_weight_count)
    if sample is False:
        print("not sampling.")
    return current_data

  # sample a bunch weights
  # make predictions 
  # output averaged predictions 
  def predict_by_sampling(self, input_data, num_samples=1):

    # reduce the use of buffer
    with torch.no_grad():
        outputs = torch.empty(num_samples, input_data.size()[0], self.output_size)
        for i in range(num_samples):
            
            outputs[i] = self.forward(input_data, sample=True, debug=True)
            
        stds = outputs.std(0)
        outputs = outputs.mean(0)
    return outputs

  def log_prior(self):
    log_prior = 0
    for layer in self.layers:
      log_prior += layer.log_prior
    return log_prior

  def log_posterior(self):
    log_posterior = 0
    for layer in self.layers:
      log_posterior += layer.log_posterior
    return log_posterior

  def cost_function(self, inputs, targets, num_samples, ratio):
    sum_log_posterior = 0
    sum_log_prior = 0
    sum_neg_log_likeli = 0
    for _ in range(num_samples):
      outputs = self(inputs, sample=True)
      sum_log_posterior += self.log_posterior()
      sum_log_prior += self.log_prior()
      if self.task_type == TaskType.CLASSIFICATION:

         # log softmax active function
         log_probs = outputs[range(targets.size()[0]), targets]

         
         
         neg_log_likeli = -log_probs.sum()
      elif self.task_type == TaskType.REGRESSION:
         neg_log_likeli = - dist.Normal(
             targets, SIGMA).log_prob(outputs).sum()
      sum_neg_log_likeli += neg_log_likeli
    kl_divergence = (sum_log_posterior / num_samples - sum_log_prior / num_samples) * ratio
    neg_log_likeli = sum_neg_log_likeli / num_samples
    loss =  kl_divergence + neg_log_likeli
    return loss, kl_divergence, neg_log_likeli

  def extra_repr(self):
    repr = ''
    for layer in self.layers:
      repr += layer.extra_repr()
      repr += '\n'
    return repr

Definition of components in Experiment

In [0]:

SAMPLE_NUM = 2
ACTIONS_NUM = 2
BUFFER = 4096



In [0]:
class Agent(object):  
  def __init__(self):
    # last 4096 interactions with the Mushroom bandit
    self.pre_context = []
    self.pre_action = []
    self.pre_reward = []
    self.value_estimates = None
    
    
  def collect_data(self):
    return len(self.pre_context)

  def select_action(self, context, logs):
    pass
  def variational_posterior_update(self, logs):
    pass
  def update_buffer(self, context, action, reward):
    self.pre_context.append(context)
    self.pre_action.append(action)
    self.pre_reward.append(reward)
    if len(self.pre_context) == BUFFER:
      self.pre_context = self.pre_context[1:]
      self.pre_action = self.pre_action[1:]
      self.pre_reward = self.pre_reward[1:]

In [0]:
class EpsGrAgent(Agent):
  def __init__(self, epsilon, optimizer_constructor=torch.optim.Adam, optim_params={'lr':1e-3, 'eps':0.01}):
    super().__init__()
    self.epsilon = epsilon
    self.value_estimates =  nn.Sequential(
        torch.nn.Linear(118, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 100),
        torch.nn.ReLU(),
        torch.nn.Linear(100, 1),
    )
    
    self.loss_fn = nn.MSELoss()
    
    if torch.cuda.is_available():
      self.value_estimates = self.value_estimates.cuda()

    self.optimizer = optimizer_constructor(self.value_estimates.parameters(), **optim_params)
    
    
  def select_action(self, context, logs=False):
    if random.random() < self.epsilon:
      return random.randint(0,ACTIONS_NUM-1)
    
    max_reward = -36
    argmax_action = -1
    for action in range(ACTIONS_NUM):
      estimated_reward = 0
      
        
      action_tensor = torch.tensor([[action]], dtype=torch.float)
        
      context_and_action = torch.cat(
          [context, action_tensor], dim=1)
        
      if torch.cuda.is_available():
        context_and_action = context_and_action.cuda()
        
      estimated_reward = self.value_estimates(context_and_action)
      if logs:
        print('chosen action {} - estimated reward: {}'.format(
            action, estimated_reward))
      if estimated_reward > max_reward:
        max_reward = estimated_reward
        argmax_action = action
    return argmax_action

  
  def variational_posterior_update(self, logs=False):
    features = []
    for context, action in zip(iter(self.pre_context),
                               iter(self.pre_action)):
      
      action_tensor = torch.tensor([[action]], dtype=torch.float)
      
      features.append(torch.cat(
          [context, action_tensor], dim=1))
    features = torch.cat(features)
    
    rewards = torch.tensor(self.pre_reward, dtype=torch.float)

    pre_set = torch.utils.data.TensorDataset(features, rewards)
    pre_loader = torch.utils.data.DataLoader(
        pre_set, batch_size=64, shuffle=True, num_workers=4)
    
    avg_loss = 0
    
    for i, data in enumerate(pre_loader):
      inputs, labels = data
      if torch.cuda.is_available():
        inputs = inputs.cuda()
        labels = labels.cuda()
      # zero the parameter gradients
      self.optimizer.zero_grad()

      # forward 
      # backward 
      # optimize
      loss = self.loss_fn(self.value_estimates(inputs).squeeze(), labels)

      loss.backward()
      self.optimizer.step()
     
      avg_loss += loss
      
    avg_loss /= len(pre_loader.dataset)
    
    if logs:
      print('Loss: {}'.format(avg_loss))
    return avg_loss    

In [0]:
class AgentBNN(Agent):
  
  def __init__(self, optimizer_constructor=torch.optim.Adam, 
               optim_params={'lr':1e-3, 'eps':0.01},
               prior_params=None,
               lr_scheduler_step_size=32,
               lr_scheduler_gamma=0.1,
               avg_weights=False,
               avg_weights_count=2,
               init_mu_weights=[-0.5, 0.5],
               init_mu_bias=[-0.5, 0.5],
               init_rho_weights=[-4, -2],
               init_rho_bias=[-4, -2]):
        
    super().__init__()
    
    # Estimating[reward | context, action]
    
    bayes_params = {'nn_input_size': 118,
                  'layer_config': [100, 100, 1],
                  'activation_config': [ActivationType.RELU, 
                                        ActivationType.RELU, ActivationType.NONE],
                  'init_mu_weights': init_mu_weights,
                  'init_mu_bias': init_mu_bias,
                  'init_rho_weights': init_rho_weights,
                  'init_rho_bias': init_rho_bias
                 }
    
    if prior_params is not None:
        bayes_params['prior_params'] = prior_params
    
    self.value_estimates = BayesNN(**bayes_params)
        
    if torch.cuda.is_available():
      self.value_estimates = self.value_estimates.cuda()
    
    self.optimizer = optimizer_constructor(
      self.value_estimates.parameters(), **optim_params)
    self.scheduler = torch.optim.lr_scheduler.StepLR(
      self.optimizer, step_size=lr_scheduler_step_size, 
      gamma=lr_scheduler_gamma)

    self.avg_weights = avg_weights
    self.avg_weights_count = avg_weights_count
  

  def select_action(self, context, logs=False):
    self.value_estimates.train()
    max_reward = -36
    argmax_action = -1
    for action in range(ACTIONS_NUM):
      expected_reward = 0
      if self.avg_weights:
        sample_count = 1
      else:
        sample_count = SAMPLE_NUM
      for i in range(sample_count):
        
        action_tensor = torch.tensor([[action]], dtype=torch.float)
        
        context_and_action = torch.cat(
            [context, action_tensor], dim=1)
        
        if torch.cuda.is_available():
          context_and_action = context_and_action.cuda()
        
        expected_reward += self.value_estimates(
          context_and_action, avg_weights=self.avg_weights,
          avg_weight_count=self.avg_weights_count)
      expected_reward /= sample_count
      if logs:
        print('chosen action {} - estimated reward: {}'.format(
            action, expected_reward))
      if expected_reward > max_reward:
        max_reward = expected_reward
        argmax_action = action
    return argmax_action
  

  def variational_posterior_update(self, logs=False):
    features = []
    for context, action in zip(iter(self.pre_context),
                               iter(self.pre_action)):
      
      action_tensor = torch.tensor([[action]], dtype=torch.float)
      
      features.append(torch.cat(
          [context, action_tensor], dim=1))
    features = torch.cat(features)
    
    rewards = torch.tensor(self.pre_reward, dtype=torch.float)

    pre_set = torch.utils.data.TensorDataset(features, rewards)
    pre_loader = torch.utils.data.DataLoader(
        pre_set, batch_size=64, shuffle=True, num_workers=4)
    
    avg_loss = 0
    avg_kl_divergence = 0
    avg_nll = 0
    
    for i, data in enumerate(pre_loader):
      inputs, labels = data
      if torch.cuda.is_available():
        inputs = inputs.cuda()
        labels = labels.cuda()

      # zero gradients parameter 
      self.optimizer.zero_grad()

      # forward 
      # backward 
      # optimize
      loss, kl_divergence, nll = self.value_estimates.cost_function(
          inputs, labels, num_samples=2, ratio=1/len(pre_loader))
      loss.backward()
      self.optimizer.step()
      torch.nn.utils.clip_grad_norm_(self.value_estimates.parameters(),
                                     4)
     
      avg_loss += loss
      avg_kl_divergence += kl_divergence
      avg_nll += nll
      
    dataset_len = len(pre_loader.dataset)  
    avg_loss /= dataset_len
    avg_kl_divergence /= dataset_len
    avg_nll /= dataset_len
    self.scheduler.step()
    
    if logs:
      print('Loss: {}'.format(avg_loss))
      print('Kullback div: {}'.format(avg_kl_divergence))
    return avg_loss.item(), avg_kl_divergence.item(), avg_nll.item()

In [0]:
class Environment(object):
  
  def __init__(self, agent, dataloader, name):
    self.agent = agent
    self.dataloader = dataloader
    self.name = name
    self.cumulative_regret = 0
  
  def play_round(self, logs=False):
    
    # Get the contexts
    #label random mushroom
    context, mush_type = next(iter(self.dataloader))
    
    if torch.cuda.is_available():
      mush_type = mush_type.cuda()
    
    # Reward defenition 
    if mush_type == 1:
      eat_reward = 5
      oracle_reward = 5
      mush_string = 'Edible'
    else: 
      oracle_reward = 0
      mush_string = 'Poisonous'
      random_draw = random.random()
      if random_draw > 0.5:
        eat_reward = 5
      else:
        eat_reward = -35
        
    # selecting action in repect of context
    selected_action = self.agent.select_action(context, logs)
    
    # Reward of Action
    if selected_action == 0: #pass
      action_string = 'passing'
      reward = 0
    else: #eat
      action_string = 'eating'
      reward = eat_reward


    # Cumulative Regret
    self.cumulative_regret += max(oracle_reward - reward, 0) 
    
    if logs:
      print('{} mushroom. Agent select {}. Received a reward: {}.'.format(mush_string, action_string, reward))
      print('Cumulative sum of regret {}'.format(self.cumulative_regret))
      
    # Send reward back to the agent
    self.agent.update_buffer(context, selected_action, reward)

    
    update_results = self.agent.variational_posterior_update(logs)
    
    return update_results

Our Experiment

In [0]:
def read_args(args=None):
  parser = argparse.ArgumentParser()
  parser.add_argument('--optimizer_type', type=str, default='SGD')
  parser.add_argument('--eps_learning_rate', type=float, default=1e-3)
  parser.add_argument('--eps_epsilon', type=float, default=1e-3)
  parser.add_argument('--eps_momentum', type=float, default=0)
  parser.add_argument('--bayes_learning_rate', type=float, default=2e-7)
  parser.add_argument('--bayes_epsilon', type=float, default=1e-3)
  parser.add_argument('--bayes_momentum', type=float, default=0)
  parser.add_argument('--bayes_lr_scheduler_step_size', type=int, default=10000)
  parser.add_argument('--bayes_lr_scheduler_gamma', type=float, default=0.1)
  parser.add_argument('--bayes_pi', type=float, default=0.5 )
  parser.add_argument('--bayes_log_sigma1', type=float, default=math.exp(-3))
  parser.add_argument('--bayes_log_sigma2', type=float, default=math.exp(-8))
  parser.add_argument('--avg_weights', dest='avg_weights', action='store_true')
  parser.add_argument('--avg_weights_count', type=int, default=2)
  parser.add_argument('--init_mu_weights_range', type=float, nargs=2, default=[-0.05, 0.05])
  parser.add_argument('--init_mu_bias_range', type=float, nargs=2, default=[-0.05, 0.05])
  parser.add_argument('--init_rho_weights_range', type=float, nargs=2, default=[-7, -6])
  parser.add_argument('--init_rho_bias_range', type=float, nargs=2, default=[-7, -6])
  parser.add_argument('--number_of_runs', type=int, default=4)
  args = parser.parse_args(args=[])
  return args


if __name__ == '__main__':

  mush_dataset = pd.read_csv('mushrooms.csv')
  train_labels = mush_dataset['class']
  train_labels = train_labels.replace(['p', 'e'],
                                [-1, 1])
  train_features = pd.get_dummies(mush_dataset.drop(['class'], axis=1))

  train_features = torch.tensor(train_features.values, dtype=torch.float)
  train_labels = torch.tensor(train_labels.values)

  trainset = torch.utils.data.TensorDataset(train_features, train_labels)
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=1,
                                            shuffle=True, num_workers=0)
    
	# Parameters for bayesNN and epsilon greedy agents
  args = read_args(sys.argv)
  
  if args.optimizer_type == 'Adam':
    optimizer_constructor = torch.optim.Adam
    epsg_optimizer_params = {'lr': args.eps_learning_rate,
                           'eps': args.eps_epsilon}
    bayes_optimizer_params = {'lr': args.bayes_learning_rate,
                            'eps': args.bayes_epsilon}
  elif args.optimizer_type == 'SGD':
    optimizer_constructor = torch.optim.SGD
    eps_optimizer_params = {'lr': args.eps_learning_rate,
                           'momentum': args.eps_momentum}
    bayes_optimizer_params = {'lr': args.bayes_learning_rate, 
                            'momentum': args.bayes_momentum}
  
  sigma1 = math.exp(args.bayes_log_sigma1)
  sigma2 = math.exp(args.bayes_log_sigma2)
  prior_params = {'pi': args.bayes_pi, 
                  'sigma1': sigma1,
                  'sigma2': sigma2}

  bayes_envs = []
  eps5_envs = []
  eps1_envs = []
  eps0_envs = []
  envs = []

  for j in range(args.number_of_runs):

    bayes_agent = AgentBNN(optimizer_constructor=optimizer_constructor,
                  optim_params=bayes_optimizer_params,
                  prior_params=prior_params,
                  lr_scheduler_step_size=args.bayes_lr_scheduler_step_size,
                  lr_scheduler_gamma=args.bayes_lr_scheduler_gamma,
                  avg_weights=args.avg_weights,
                  avg_weights_count=args.avg_weights_count,
                  init_mu_weights=args.init_mu_weights_range,
                  init_mu_bias=args.init_mu_bias_range,
                  init_rho_weights=args.init_rho_weights_range,
                  init_rho_bias=args.init_rho_bias_range)
    bayes_env = Environment(bayes_agent, trainloader,
                          'Bayes by Backprop')

    eps5_agent = EpsGrAgent(epsilon=.05, 
                        optimizer_constructor=optimizer_constructor,
                        optim_params=eps_optimizer_params)
    eps5_env = Environment(eps5_agent, copy.deepcopy(trainloader),
                          '5% Ɛ-Greedy')

    eps1_agent = EpsGrAgent(epsilon=.01, 
                        optimizer_constructor=optimizer_constructor,
                        optim_params=eps_optimizer_params)
    eps1_env = Environment(eps1_agent, copy.deepcopy(trainloader),
                          '1% Ɛ-Greedy')

    eps0_agent = EpsGrAgent(epsilon=.00, 
                        optimizer_constructor=optimizer_constructor,
                        optim_params=eps_optimizer_params)
    eps0_env = Environment(eps0_agent, copy.deepcopy(trainloader),
                          'Greedy')

    bayes_envs.append(bayes_env)
    eps5_envs.append(eps5_env)
    eps1_envs.append(eps1_env)
    eps0_envs.append(eps0_env)


  bayes_loss = []
  bayes_kl_divergence = []
  bayes_nll = []




  eps5_regret = []
  eps1_regret = []
  eps0_regret = []
  bayes_regret = []


  envs = [(eps5_envs, eps5_regret),
          (eps1_envs, eps1_regret),
          (eps0_envs, eps0_regret),
          (bayes_envs, bayes_regret)]

 

  for step in range(20000):

    logs = False

    if (step+1) % 100 == 0:
      logs = True
      print('Step {}'.format(step))

    for env_set, regret in envs:
      if logs:
        print(env_set[0].name)
      is_bnn = env_set[0].name == 'Bayes by Backprop'
      avg_regret = 0
     
      if is_bnn:
        avg_loss = 0
        avg_kl_divergence = 0
        avg_nll = 0
      for env in env_set:
        if is_bnn:
          loss, kl_divergence, nll = env.play_round(logs=logs)
        else:
          env.play_round(logs=logs)
        avg_regret += env.cumulative_regret
        
        if is_bnn:
          avg_loss += loss
          avg_kl_divergence += kl_divergence
          avg_nll += nll
      avg_regret /= args.number_of_runs
      
      if is_bnn:
        avg_loss /= args.number_of_runs
        avg_kl_divergence /= args.number_of_runs
        avg_nll /= args.number_of_runs 
        bayes_loss.append(avg_loss)
        bayes_kl_divergence.append(avg_kl_divergence)
        bayes_nll.append(avg_nll)
      regret.append(avg_regret)
    
    if (step+1) % 200 == 0:
      
      for env_set, regret in envs:
        plt.plot(np.array(regret), label=env_set[0].name)
      plt.legend()
      plt.title('Cumulative regret of various agents on the mushroom bandit task')
      plt.ylabel('Cumulative Regret')
      plt.xlabel('Steps')
      plt.savefig('reg_{}'.format(step+1), dpi=400, bbox_inches='tight')
      plt.clf()

Step 99
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-1.0763]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-1.1547]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 345
Loss: 1.0218138694763184
chosen action 0 - estimated reward: tensor([[-0.2516]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.6768]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 505
Loss: 2.2900586128234863
chosen action 0 - estimated reward: tensor([[-0.3894]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.4443]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 285
Loss: 0.33119356632232666
chosen action 0 - estimated reward: tensor([[0.0234]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.0148]], grad_fn=<Add

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1199
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.7425]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.5606]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2975


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04215521737933159
chosen action 0 - estimated reward: tensor([[0.2770]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-22.7600]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2935


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04120619222521782
chosen action 0 - estimated reward: tensor([[-0.1554]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.6235]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2585


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03845479339361191
chosen action 0 - estimated reward: tensor([[0.3077]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-2.2978]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2765


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.054684195667505264
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.0826]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.6297]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3030


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.08649840950965881
chosen action 0 - estimated reward: tensor([[-0.0592]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0988]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2930


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06048260256648064
chosen action 0 - estimated reward: tensor([[-0.0137]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-26.0538]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2605


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.021543653681874275
chosen action 0 - estimated reward: tensor([[0.0662]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9145]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2820


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06981024146080017
Greedy
chosen action 0 - estimated reward: tensor([[0.0425]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.1953]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3205


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0008773199515417218
chosen action 0 - estimated reward: tensor([[0.0139]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.8868]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3370
Loss: 0.0036742675583809614
chosen action 0 - estimated reward: tensor([[-0.3119]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-36.9684]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2350


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.061137497425079346
chosen action 0 - estimated reward: tensor([[-0.1925]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-29.1252]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2085


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03968289494514465
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.1671]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1659]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3200
Loss: 58990.3671875
Kullback div: 110.96083068847656
chosen action 0 - estimated reward: tensor([[-0.3400]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.3407]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3670


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 59630.59375
Kullback div: 110.95144653320312
chosen action 0 - estimated reward: tensor([[-0.4630]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.4642]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3755


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 84708.8515625
Kullback div: 111.0245590209961
chosen action 0 - estimated reward: tensor([[-0.4223]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.4263]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3810


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 99994.8046875
Kullback div: 110.9665756225586


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1299
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.4186]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.2206]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3190


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03993000090122223
chosen action 0 - estimated reward: tensor([[0.2569]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[1.4138]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: -35.
Cumulative sum of regret 3145


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.039905380457639694
chosen action 0 - estimated reward: tensor([[0.0063]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.6512]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2700


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.018925219774246216
chosen action 0 - estimated reward: tensor([[0.2581]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.5727]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2900


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.028196945786476135
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.3347]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-17.9243]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3110


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05869780480861664
chosen action 0 - estimated reward: tensor([[0.4541]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-25.6580]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3025


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03600220009684563
chosen action 0 - estimated reward: tensor([[0.1035]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1381]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2665
Loss: 0.0193580761551857
chosen action 0 - estimated reward: tensor([[-0.1622]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9144]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2980


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.033674318343400955
Greedy
chosen action 0 - estimated reward: tensor([[0.0112]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.2037]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3470


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0002379110228503123
chosen action 0 - estimated reward: tensor([[-0.0493]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-4.9210]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3590


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.002630814677104354
chosen action 0 - estimated reward: tensor([[0.7097]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-3.6950]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2470


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06132575869560242
chosen action 0 - estimated reward: tensor([[-0.2029]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-36.7573]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2145


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.028210613876581192
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.1465]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1473]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3470
Loss: 62946.078125
Kullback div: 102.40900421142578
chosen action 0 - estimated reward: tensor([[-0.2778]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2819]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3830


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 56586.68359375
Kullback div: 102.41594696044922
chosen action 0 - estimated reward: tensor([[-0.3779]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.3985]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3940


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 78217.9296875
Kullback div: 102.48478698730469
chosen action 0 - estimated reward: tensor([[-0.2922]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2886]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3965


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 91940.15625
Kullback div: 102.41972351074219


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1399
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.4153]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5702]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3285


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.030092475935816765
chosen action 0 - estimated reward: tensor([[-0.0037]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.6779]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3360


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05836310610175133
chosen action 0 - estimated reward: tensor([[0.0986]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-4.5627]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2820


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.005486879963427782
chosen action 0 - estimated reward: tensor([[-0.6624]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4634]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3030


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.044518131762742996
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1555]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-27.9351]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3165


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03493529558181763
chosen action 0 - estimated reward: tensor([[0.3037]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4720]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3060


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.028418030589818954
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2770


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.01743759587407112
chosen action 0 - estimated reward: tensor([[0.1973]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8352]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3165


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0659957155585289
Greedy
chosen action 0 - estimated reward: tensor([[-0.0077]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-6.2861]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3715


Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.00010791754903038964
chosen action 0 - estimated reward: tensor([[0.1214]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-2.3826]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3805


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.004613596014678478
chosen action 0 - estimated reward: tensor([[-0.3720]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.5878]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2595
Loss: 0.03248089924454689
chosen action 0 - estimated reward: tensor([[-0.3482]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9246]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2190


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError

Loss: 0.02317120134830475
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.1228]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1185]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3625


: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 60335.94921875
Kullback div: 95.09341430664062
chosen action 0 - estimated reward: tensor([[-0.2411]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2491]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4060


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 55472.359375
Kullback div: 95.0939712524414
chosen action 0 - estimated reward: tensor([[-0.2521]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2800]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4110


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 74526.96875
Kullback div: 95.13186645507812
chosen action 0 - estimated reward: tensor([[-0.1801]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2041]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4080


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 86847.921875
Kullback div: 95.12333679199219


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1499
5% Ɛ-Greedy
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3310


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.005670331884175539
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3450


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.02075464464724064
chosen action 0 - estimated reward: tensor([[0.2197]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.3788]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: -35.
Cumulative sum of regret 3140


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.07006882131099701
chosen action 0 - estimated reward: tensor([[0.1451]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1080]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3080


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.02337799035012722
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.5669]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-15.9914]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3240


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.035911791026592255
chosen action 0 - estimated reward: tensor([[-0.1590]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-46.3895]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3140


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.024999340996146202
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2890


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.002859399886801839
chosen action 0 - estimated reward: tensor([[-0.0027]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0206]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3360


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Loss: 0.09283419698476791
Greedy
chosen action 0 - estimated reward: tensor([[-0.0389]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3020]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3965



Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 4.070740033057518e-05
chosen action 0 - estimated reward: tensor([[0.2616]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.8611]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4155


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06123000755906105
chosen action 0 - estimated reward: tensor([[-0.3826]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.5119]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2715


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.05302174389362335
chosen action 0 - estimated reward: tensor([[0.0499]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-32.2925]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2210


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02090354450047016
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.2570]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.2572]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3750


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 56635.98046875
Kullback div: 88.77110290527344
chosen action 0 - estimated reward: tensor([[-0.2294]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2514]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4235


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 52517.91015625
Kullback div: 88.75534057617188
chosen action 0 - estimated reward: tensor([[-0.1420]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.1734]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4245


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 70209.265625
Kullback div: 88.8105697631836
chosen action 0 - estimated reward: tensor([[0.0046]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.0104]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4160


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 82677.953125
Kullback div: 88.76376342773438


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1599
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1476]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[9.7956]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3475


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.009689881466329098
chosen action 0 - estimated reward: tensor([[-0.1239]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.3753]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3580


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02343538962304592
chosen action 0 - estimated reward: tensor([[-0.3201]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-23.5876]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3290


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0751868262887001
chosen action 0 - estimated reward: tensor([[0.1036]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.1116]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3140


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError

Loss: 0.015552828088402748
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.0433]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.8579]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3255


: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02596100978553295
chosen action 0 - estimated reward: tensor([[-0.0149]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.7889]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3220


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError

Loss: 0.03742722421884537
chosen action 0 - estimated reward: tensor([[0.1278]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-17.2104]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2900


: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0009106305078603327
chosen action 0 - estimated reward: tensor([[-0.1397]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-39.5349]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3545


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.09730005264282227
Greedy
chosen action 0 - estimated reward: tensor([[-0.0339]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.2027]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4260


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 2.3986318410607055e-05
chosen action 0 - estimated reward: tensor([[-0.5015]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[0.7256]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4455


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0812285840511322
chosen action 0 - estimated reward: tensor([[-0.9961]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8875]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2830


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04481116309762001
chosen action 0 - estimated reward: tensor([[0.3686]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-40.8355]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2215


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.018629668280482292
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.2843]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.2811]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3885
Loss: 57550.3203125
Kullback div: 83.19953155517578
chosen action 0 - estimated reward: tensor([[-0.0951]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.1252]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4385


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 49798.4140625
Kullback div: 83.1923599243164
chosen action 0 - estimated reward: tensor([[-0.0717]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.1714]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4325


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 66776.1640625
Kullback div: 83.27812194824219
chosen action 0 - estimated reward: tensor([[0.1215]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1269]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4220


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 78726.953125
Kullback div: 83.21774291992188


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1699
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.2068]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.9329]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: -35.
Cumulative sum of regret 3605


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.021565185859799385
chosen action 0 - estimated reward: tensor([[-0.4185]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2132]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3800


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.018438825383782387
chosen action 0 - estimated reward: tensor([[0.0872]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-38.3478]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3305


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:


Loss: 0.06329654157161713
chosen action 0 - estimated reward: tensor([[0.0344]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8491]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3290


BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02480468526482582
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.4334]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.4973]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3345


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.027220746502280235
chosen action 0 - estimated reward: tensor([[0.6916]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1854]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3230


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02821856364607811
chosen action 0 - estimated reward: tensor([[-0.0003]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-23.2785]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2980


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.01172649022191763
chosen action 0 - estimated reward: tensor([[0.0267]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1913]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3845


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.12732870876789093
Greedy
chosen action 0 - estimated reward: tensor([[-0.0206]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.8976]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4515


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 1.9034692741115578e-05
chosen action 0 - estimated reward: tensor([[0.0573]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-9.8887]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4780


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06479969620704651
chosen action 0 - estimated reward: tensor([[0.9173]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.3351]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3020


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03982691466808319
chosen action 0 - estimated reward: tensor([[0.2500]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0863]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2260


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.023735126480460167
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.3390]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3371]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4010


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 57001.41015625
Kullback div: 78.3141098022461
chosen action 0 - estimated reward: tensor([[-0.0035]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.0039]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4495


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 46933.63671875
Kullback div: 78.32353210449219
chosen action 0 - estimated reward: tensor([[0.0301]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.0429]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4475


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 64878.2734375
Kullback div: 78.3738021850586
chosen action 0 - estimated reward: tensor([[0.2391]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.0784]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4275


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 74380.1796875
Kullback div: 78.32013702392578


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1799
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.0515]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-20.4622]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3690


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.007875601761043072
chosen action 0 - estimated reward: tensor([[1.4100]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-36.5970]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3965


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.21366429328918457
chosen action 0 - estimated reward: tensor([[0.4790]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9409]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3410


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05518731847405434
chosen action 0 - estimated reward: tensor([[0.2091]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-31.2968]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3375


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.014784722588956356
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.5644]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-5.1123]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3355


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.015627093613147736
chosen action 0 - estimated reward: tensor([[0.0408]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8786]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3245


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06552799046039581
chosen action 0 - estimated reward: tensor([[2.1221]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.3103]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: -35.
Cumulative sum of regret 3130


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.031578581780195236
chosen action 0 - estimated reward: tensor([[-0.0261]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-53.5207]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3955


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0857740268111229
Greedy
chosen action 0 - estimated reward: tensor([[0.0213]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3328]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4770


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 1.580697971803602e-05
chosen action 0 - estimated reward: tensor([[0.0786]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9071]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4895


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05088554322719574
chosen action 0 - estimated reward: tensor([[0.0064]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-29.3213]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3170


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.026924489066004753
chosen action 0 - estimated reward: tensor([[0.1007]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-18.3623]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2310


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029805941507220268
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.3945]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3856]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4070


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 55356.72265625
Kullback div: 73.98619079589844
chosen action 0 - estimated reward: tensor([[0.0582]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.0614]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4625


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 45538.796875
Kullback div: 73.96295928955078
chosen action 0 - estimated reward: tensor([[0.1598]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1821]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4550


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 62354.95703125
Kullback div: 74.02088165283203
chosen action 0 - estimated reward: tensor([[0.2524]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.2424]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4365


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 70938.28125
Kullback div: 73.98174285888672


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1899
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.1399]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.7979]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3895


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.006283558439463377
chosen action 0 - estimated reward: tensor([[-0.9596]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-4.6047]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4085


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.07839599251747131
chosen action 0 - estimated reward: tensor([[0.1552]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.9172]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3620


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06969857960939407
chosen action 0 - estimated reward: tensor([[0.3419]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.6470]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3600


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.022266177460551262
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.8156]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-34.0864]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3450


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03350759297609329
chosen action 0 - estimated reward: tensor([[0.0559]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1657]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3285


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02497807890176773
chosen action 0 - estimated reward: tensor([[0.0284]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-24.8389]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3240


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03084082342684269
chosen action 0 - estimated reward: tensor([[0.1507]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-36.8349]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3960


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05777893215417862
Greedy
chosen action 0 - estimated reward: tensor([[-0.0197]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.4246]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5055


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 1.3353475878830068e-05
chosen action 0 - estimated reward: tensor([[0.4449]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-6.3550]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5025


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.025361284613609314
chosen action 0 - estimated reward: tensor([[-0.0654]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-19.3127]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3175


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.00992128811776638
chosen action 0 - estimated reward: tensor([[0.0146]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-31.2980]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2455


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03659490495920181
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.5854]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.5892]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4105


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 53324.203125
Kullback div: 70.07524108886719
chosen action 0 - estimated reward: tensor([[0.1732]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.0790]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4675


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 43176.7109375
Kullback div: 70.07303619384766
chosen action 0 - estimated reward: tensor([[0.2241]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.2400]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4610


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 59580.765625
Kullback div: 70.11157989501953
chosen action 0 - estimated reward: tensor([[0.4759]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4425]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4460


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 71805.90625
Kullback div: 70.08512115478516


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 1999
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.4123]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.3023]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3920


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0037713912315666676
chosen action 0 - estimated reward: tensor([[0.5647]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5117]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4170


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.031274035573005676
chosen action 0 - estimated reward: tensor([[-0.2022]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-12.5112]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3710


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03661337494850159
chosen action 0 - estimated reward: tensor([[-0.0901]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9438]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3660


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.011864886619150639
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.3345]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4085]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3460


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.023920532315969467
chosen action 0 - estimated reward: tensor([[0.6332]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9889]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3365


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03428106755018234
chosen action 0 - estimated reward: tensor([[0.2207]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-35.0442]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3315


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03851499408483505
chosen action 0 - estimated reward: tensor([[-0.1667]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-62.7385]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4105


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.048826705664396286
Greedy
chosen action 0 - estimated reward: tensor([[0.0063]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.9910]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5300


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0005747320828959346
chosen action 0 - estimated reward: tensor([[-1.4786]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-30.4923]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5140


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.015184256248176098
chosen action 0 - estimated reward: tensor([[0.0399]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.4046]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3215


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.017188183963298798
chosen action 0 - estimated reward: tensor([[-0.3793]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9553]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2470


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02247542515397072
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.5748]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.5462]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4125


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 50652.94921875
Kullback div: 66.56629943847656
chosen action 0 - estimated reward: tensor([[0.2300]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1196]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4710


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 42492.30859375
Kullback div: 66.56120300292969
chosen action 0 - estimated reward: tensor([[0.3509]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3572]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4645


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 56978.31640625
Kullback div: 66.60185241699219
chosen action 0 - estimated reward: tensor([[0.5001]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.5404]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4525


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 71089.1953125
Kullback div: 66.5785903930664


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2099
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.0107]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2757]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4010


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.004261075984686613
chosen action 0 - estimated reward: tensor([[-0.6692]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8170]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4285


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.031122826039791107
chosen action 0 - estimated reward: tensor([[-0.3136]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0499]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3710


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.017545495182275772
chosen action 0 - estimated reward: tensor([[0.2506]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-22.0274]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3795


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.027323907241225243
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.1948]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.1324]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3470


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.020051121711730957
chosen action 0 - estimated reward: tensor([[-0.2361]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-8.3648]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3445


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.037112146615982056
chosen action 0 - estimated reward: tensor([[0.0783]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0393]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3385


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.033684853464365005
chosen action 0 - estimated reward: tensor([[-0.1299]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9763]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4255


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06387485563755035
Greedy
chosen action 0 - estimated reward: tensor([[0.0058]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-8.8112]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5570


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.000273058918537572
chosen action 0 - estimated reward: tensor([[0.0174]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-57.2084]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5335


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04074949026107788
chosen action 0 - estimated reward: tensor([[-0.7174]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9802]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3260


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.008055848069489002
chosen action 0 - estimated reward: tensor([[-0.1792]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8541]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2585


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02538854256272316
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.7270]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7403]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4225


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 51841.4765625
Kullback div: 63.398929595947266
chosen action 0 - estimated reward: tensor([[0.3413]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3741]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4730


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 41134.5390625
Kullback div: 63.40249252319336
chosen action 0 - estimated reward: tensor([[0.4060]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.0564]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4680


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 55579.80078125
Kullback div: 63.44418716430664
chosen action 0 - estimated reward: tensor([[0.5596]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3614]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4565


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 69575.9453125
Kullback div: 63.41380310058594


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2199
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.0849]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-26.1275]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4020


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0012883070157840848
chosen action 0 - estimated reward: tensor([[-0.4083]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-31.2856]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4400


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.025971654802560806
chosen action 0 - estimated reward: tensor([[0.0581]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-28.5482]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3895


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029965046793222427
chosen action 0 - estimated reward: tensor([[0.1627]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-14.7636]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3880


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.031527552753686905
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1515]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-50.8414]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3495


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.016811689361929893
chosen action 0 - estimated reward: tensor([[-0.0158]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5111]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3490


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029284831136465073
chosen action 0 - estimated reward: tensor([[0.0401]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-27.9457]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3460


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03656790778040886
chosen action 0 - estimated reward: tensor([[0.2699]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-10.9227]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4445


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.08279429376125336
Greedy
chosen action 0 - estimated reward: tensor([[0.0230]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.2262]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5815


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.00011682783951982856
chosen action 0 - estimated reward: tensor([[0.3228]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-15.2184]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5380


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0116208391264081
chosen action 0 - estimated reward: tensor([[-0.1849]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5845]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3310


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.009926709346473217
chosen action 0 - estimated reward: tensor([[-0.0359]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.9877]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2730


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.526863157749176
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.8268]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7560]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4240


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 49606.1015625
Kullback div: 60.52546310424805
chosen action 0 - estimated reward: tensor([[0.4043]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.1580]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4780


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 41336.66796875
Kullback div: 60.51395034790039
chosen action 0 - estimated reward: tensor([[0.4569]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4635]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4715


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 54314.8359375
Kullback div: 60.555362701416016
chosen action 0 - estimated reward: tensor([[0.6036]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.6492]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4625


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 67535.6796875
Kullback div: 60.518733978271484


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2299
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.0007]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0035]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4155


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.003551790025085211
chosen action 0 - estimated reward: tensor([[0.5992]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-29.9810]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4550


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04246993362903595
chosen action 0 - estimated reward: tensor([[0.2060]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-34.5312]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3970


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.030599268153309822
chosen action 0 - estimated reward: tensor([[-0.3345]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.7822]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3995


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02807578630745411
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.2618]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9416]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3535


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.014758504927158356
chosen action 0 - estimated reward: tensor([[-0.2083]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-20.2047]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3605


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02802225947380066
chosen action 0 - estimated reward: tensor([[-0.0703]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-33.2282]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3510


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.024742932990193367
chosen action 0 - estimated reward: tensor([[0.2191]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5681]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4530


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.08855477720499039
Greedy
chosen action 0 - estimated reward: tensor([[-0.0147]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3194]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 6035


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 4.7304511099355295e-05
chosen action 0 - estimated reward: tensor([[-0.0358]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1066]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5485


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.019647788256406784
chosen action 0 - estimated reward: tensor([[-0.7960]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2171]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3460


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0288899764418602
chosen action 0 - estimated reward: tensor([[-2.1689]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[2.2005]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2740


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.34370651841163635
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.8492]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8917]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4265


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 48653.30078125
Kullback div: 57.8894157409668
chosen action 0 - estimated reward: tensor([[0.5405]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.6166]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4860


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 42270.82421875
Kullback div: 57.88471603393555
chosen action 0 - estimated reward: tensor([[0.5619]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4125]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4795


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 55335.953125
Kullback div: 57.929443359375
chosen action 0 - estimated reward: tensor([[0.7099]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7817]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4690


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 66898.8984375
Kullback div: 57.8908576965332


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2399
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.0816]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-62.3536]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4170


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0014318139292299747
chosen action 0 - estimated reward: tensor([[-0.0515]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-35.0113]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4560


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.027773141860961914
chosen action 0 - estimated reward: tensor([[-0.1308]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-44.6230]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3975


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.026221558451652527
chosen action 0 - estimated reward: tensor([[0.8380]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4425]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4115


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029421815648674965
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.4233]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-43.4912]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3540


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.013826621696352959
chosen action 0 - estimated reward: tensor([[0.2370]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0620]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3680


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0271662175655365
chosen action 0 - estimated reward: tensor([[-0.0460]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.3122]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3545


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0229126438498497
chosen action 0 - estimated reward: tensor([[-0.0621]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.1826]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4605


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06334944814443588
Greedy
chosen action 0 - estimated reward: tensor([[-0.0179]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3681]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 6255


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 2.0107772797928192e-05
chosen action 0 - estimated reward: tensor([[0.1157]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-11.2867]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5535


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.009729772806167603
chosen action 0 - estimated reward: tensor([[-0.4770]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8008]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3565


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.026264440268278122
chosen action 0 - estimated reward: tensor([[2.1511]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[6.9998]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2755


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.24575524032115936
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.8955]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.9392]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4280


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 46791.59765625
Kullback div: 55.4709587097168
chosen action 0 - estimated reward: tensor([[0.5522]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.5885]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4945


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 43009.359375
Kullback div: 55.465057373046875
chosen action 0 - estimated reward: tensor([[0.5546]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[-0.1144]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4865


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 53140.09375
Kullback div: 55.50961685180664
chosen action 0 - estimated reward: tensor([[0.7954]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8479]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4700


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 64801.53515625
Kullback div: 55.48535919189453


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2499
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1147]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9873]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4365


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02828003466129303
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4780


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.033529095351696014
chosen action 0 - estimated reward: tensor([[0.1111]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-5.6422]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4115


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04466180130839348
chosen action 0 - estimated reward: tensor([[0.5163]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-74.1059]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4155


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.018929796293377876
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.2597]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-13.9205]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3555


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.012746439315378666
chosen action 0 - estimated reward: tensor([[0.2166]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8482]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3720


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.031506650149822235
chosen action 0 - estimated reward: tensor([[0.1006]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-26.8353]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3665


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029377209022641182
chosen action 0 - estimated reward: tensor([[0.3190]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-19.4776]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4725


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.06974897533655167
Greedy
chosen action 0 - estimated reward: tensor([[-0.0085]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.2844]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 6535


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 1.0446307896927465e-05
chosen action 0 - estimated reward: tensor([[0.0294]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-22.0872]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5650


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.011952482163906097
chosen action 0 - estimated reward: tensor([[-0.4544]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.8206]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3765


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.040484488010406494
chosen action 0 - estimated reward: tensor([[1.9165]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-33.4621]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2765


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.15734240412712097
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[0.9747]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8792]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4290


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 46055.30078125
Kullback div: 53.26860046386719
chosen action 0 - estimated reward: tensor([[0.6396]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4069]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4965


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 42089.83984375
Kullback div: 53.25979232788086
chosen action 0 - estimated reward: tensor([[0.6399]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7039]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4870


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 53116.921875
Kullback div: 53.292850494384766
chosen action 0 - estimated reward: tensor([[0.8473]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7697]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4750


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 62901.65234375
Kullback div: 53.271568298339844


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2599
5% Ɛ-Greedy
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4415


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02471647970378399
chosen action 0 - estimated reward: tensor([[-0.1016]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-37.8507]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4900


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03862916678190231
chosen action 0 - estimated reward: tensor([[0.1206]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.5210]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4190


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.041312843561172485
chosen action 0 - estimated reward: tensor([[0.3151]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0391]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4240


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02663598582148552
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1091]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-37.2879]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3670


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.020341981202363968
chosen action 0 - estimated reward: tensor([[-2.5243]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-74.6153]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3905


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0416937880218029
chosen action 0 - estimated reward: tensor([[-0.3319]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2939]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3775


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.037716932594776154
chosen action 0 - estimated reward: tensor([[-0.1160]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9830]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4730


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.050614506006240845
Greedy
chosen action 0 - estimated reward: tensor([[-0.0055]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3736]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 6770


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 7.466721854143543e-06
chosen action 0 - estimated reward: tensor([[0.0131]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-13.2082]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5730


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.014732003211975098
chosen action 0 - estimated reward: tensor([[-0.0949]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-7.7463]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3940


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.053732629865407944
chosen action 0 - estimated reward: tensor([[0.0185]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-24.4782]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2815


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.08553972840309143
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[1.0574]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.9499]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4370


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 45908.875
Kullback div: 51.21316146850586
chosen action 0 - estimated reward: tensor([[0.6795]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.6046]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5010


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 41844.08984375
Kullback div: 51.199283599853516
chosen action 0 - estimated reward: tensor([[0.7525]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8225]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5020


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 54758.51171875
Kullback div: 51.24468231201172
chosen action 0 - estimated reward: tensor([[0.8613]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4756]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4775


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 61208.78125
Kullback div: 51.22334289550781


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2699
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.2950]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.3306]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4645


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02001108042895794
chosen action 0 - estimated reward: tensor([[-0.0920]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-18.0547]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5010


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.046458229422569275
chosen action 0 - estimated reward: tensor([[0.0729]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.8481]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4305


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.08348575234413147
chosen action 0 - estimated reward: tensor([[0.3027]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1739]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4365


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.037587009370326996
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.2199]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0707]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3745


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.015281549654901028
chosen action 0 - estimated reward: tensor([[-0.1676]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-39.6805]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3980


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03616523742675781
chosen action 0 - estimated reward: tensor([[0.0521]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.1130]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3775


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.029249323531985283
chosen action 0 - estimated reward: tensor([[0.1960]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[2.7763]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4775


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.051536254584789276
Greedy
chosen action 0 - estimated reward: tensor([[-0.0066]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.2864]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 7005


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 6.3479269556410145e-06
chosen action 0 - estimated reward: tensor([[-0.0856]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.7013]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5855


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0221311803907156
chosen action 0 - estimated reward: tensor([[0.5312]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2018]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4090


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04583641514182091
chosen action 0 - estimated reward: tensor([[0.0840]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-22.4433]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2820


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03958861902356148
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[1.0380]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[1.0835]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4375


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 45338.90234375
Kullback div: 49.31521987915039
chosen action 0 - estimated reward: tensor([[0.7703]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8350]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5015


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 40356.73828125
Kullback div: 49.307796478271484
chosen action 0 - estimated reward: tensor([[0.7039]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.3898]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5065


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 54399.4609375
Kullback div: 49.340003967285156
chosen action 0 - estimated reward: tensor([[0.9722]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[1.0139]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4890


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 62783.8203125
Kullback div: 49.31642532348633


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2799
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.1651]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.6395]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4730


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.028121240437030792
chosen action 0 - estimated reward: tensor([[-0.1501]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[4.9917]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5085


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04300512745976448
chosen action 0 - estimated reward: tensor([[0.0472]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-24.4095]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4495


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.036584749817848206
chosen action 0 - estimated reward: tensor([[0.2464]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4554]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4560


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.036958709359169006
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[-0.1161]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.4474]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3795


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.012068377807736397
chosen action 0 - estimated reward: tensor([[0.2429]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.3511]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4090


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0415983572602272
chosen action 0 - estimated reward: tensor([[-0.2322]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-10.4695]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 3885


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03154392167925835
chosen action 0 - estimated reward: tensor([[0.0515]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-36.5328]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4920


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05101393535733223
Greedy
chosen action 0 - estimated reward: tensor([[0.0075]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3574]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 7250


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 5.631734438793501e-06
chosen action 0 - estimated reward: tensor([[-0.1333]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-68.3770]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5905


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.01493879035115242
chosen action 0 - estimated reward: tensor([[0.0213]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-37.1273]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4160


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04970408231019974
chosen action 0 - estimated reward: tensor([[0.0174]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.0254]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 2820


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.028295813128352165
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[1.1565]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[1.2243]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4445


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 45644.88671875
Kullback div: 47.55116653442383
chosen action 0 - estimated reward: tensor([[0.7453]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8060]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5190


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 43899.765625
Kullback div: 47.551170349121094
chosen action 0 - estimated reward: tensor([[0.7186]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.4482]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5065


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 53000.26953125
Kullback div: 47.57625961303711
chosen action 0 - estimated reward: tensor([[0.9638]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8320]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5065


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 64749.52734375
Kullback div: 47.558860778808594


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable

Step 2899
5% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.4074]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[2.3329]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4800


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.0321178138256073
chosen action 0 - estimated reward: tensor([[-1.0174]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[3.8079]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5170


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.052039530128240585
chosen action 0 - estimated reward: tensor([[-0.1040]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-39.2059]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4535


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.039332158863544464
chosen action 0 - estimated reward: tensor([[-0.1789]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-23.5940]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4665


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.03628186881542206
1% Ɛ-Greedy
chosen action 0 - estimated reward: tensor([[0.3921]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2466]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 3835


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.015126936137676239
chosen action 0 - estimated reward: tensor([[0.5181]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-20.4245]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4130


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04003097116947174
chosen action 0 - estimated reward: tensor([[-0.0016]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.8141]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select eating. Received a reward: -35.
Cumulative sum of regret 4000


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.036722224205732346
chosen action 0 - estimated reward: tensor([[-0.0166]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[5.2931]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4965


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.05532651022076607
Greedy
chosen action 0 - estimated reward: tensor([[-0.0023]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-0.3622]], grad_fn=<AddmmBackward>)
Edible mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 7500


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 5.192542175791459e-06
chosen action 0 - estimated reward: tensor([[-0.1674]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-19.6568]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 6065


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.01893959380686283
chosen action 0 - estimated reward: tensor([[0.0306]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-9.8593]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 4210


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.04501748085021973
chosen action 0 - estimated reward: tensor([[0.0635]], grad_fn=<AddmmBackward>)
chosen action 1 - estimated reward: tensor([[-28.9500]], grad_fn=<AddmmBackward>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 2825


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 0.02470226399600506
Bayes by Backprop
chosen action 0 - estimated reward: tensor([[1.1344]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[1.1570]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 4530


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 46261.0546875
Kullback div: 45.91714859008789
chosen action 0 - estimated reward: tensor([[0.8736]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.9466]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5195


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 43166.75
Kullback div: 45.90192794799805
chosen action 0 - estimated reward: tensor([[0.7843]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.8606]], grad_fn=<DivBackward0>)
Edible mushroom. Agent select eating. Received a reward: 5.
Cumulative sum of regret 5105


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 52489.29296875
Kullback div: 45.945621490478516
chosen action 0 - estimated reward: tensor([[1.0250]], grad_fn=<DivBackward0>)
chosen action 1 - estimated reward: tensor([[0.7117]], grad_fn=<DivBackward0>)
Poisonous mushroom. Agent select passing. Received a reward: 0.
Cumulative sum of regret 5120


Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable


Loss: 64230.8046875
Kullback div: 45.921348571777344


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable
Exception ignored when trying to write to the signal wakeup fd:
BlockingIOError: [Errno 11] Resource temporarily unavailable