# ME bias in the single agent setting

## Setup the training

In [1]:
# IMPORTS 

import tensorflow as tf
import numpy as np
import sys
import os
import pickle

sys.path.append("/Users/xxAlv1Nxx/Documents/01a Masters/05 CS/CS 428B Probabilistic Models of Cognition – Language/Project/pragmatic_agents_me_bias")
from RSA_communication_agents import RSASpeaker0, RSAListener0, RSASpeaker1, RSAListener1

In [2]:
# GENERATE INPUT MESSAGES 

def generate_messages(size, n, ME=1):
    """ Generates the message input for the listener
        inputs: 
            size - number of data points
            n - number of states and messages in the system
            ME - number of messages that are withheld from training, here 1 or 2
        outputs: 
            data - training data 
            labels - training labels
    """
    
    data = np.zeros((size, n, n))
    if ME == 1:
        selection = np.random.choice(n-1, size=(size))
    elif ME == 2: 
        selection = np.random.choice([i for i in range(1, n-1)], size=(size))
    for i in range(n):
        data[selection == i, i, :] = 1.
    labels = tf.one_hot(selection, depth=n)
    
    return np.float32(data), labels

In [3]:
# GENERATE BILINGUAL INPUT MESSAGES 

def generate_bilingual_messages(size, n, ME=1):
    """ Generates the message input for the listener
        inputs: 
            size - number of data points
            n - number of states in the system (n_messages = 2*n)
            ME - number of messages that are withheld from training, here 1 or 2
        outputs: 
            data - training data 
            labels - training labels
    """
    
    data = np.zeros((size, 2*n, n))
    if ME == 1:
        selection = np.random.choice([i for i in range(0, 2*n-1) if i != n-1], size=(size))
    elif ME == 2: 
        selection = np.random.choice([i for i in range(1, 2*n-1) if (i != n-1 and i != n)], size=(size))
    for i in range(2*n):
        data[selection == i, i, :] = 1.
    labels = tf.one_hot(tf.math.floormod(selection, n), depth=n)
    
    return np.float32(data), labels

In [9]:
generate_bilingual_messages(5,3)

(array([[[0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[1., 1., 1.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.]],
 
        [[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [1., 1., 1.],
         [0., 0., 0.]]], dtype=float32),
 <tf.Tensor: shape=(5, 3), dtype=float32, numpy=
 array([[0., 1., 0.],
        [1., 0., 0.],
        [0., 1., 0.],
        [0., 1., 0.],
        [0., 1., 0.]], dtype=float32)>)

In [4]:
# TRAIN A LISTENER AND SAVE THE PARAMETERS AS WELL AS THE REWARDS AND THE LEXICA

def run_listener(n=3, reasoning=0, n_epochs=50, ME=1, learning_rate=0.001, runs=100):
    """ Trains the listener on a single agent Lewis game as described in Experiment 1.
        inputs: 
            n - number of states and messages (in total)
            reasoning - reasoning level of the listener, 0 for literal, 1 for pragmatic 
            ME - number of messages that are left out during training, here 1 or 2
            learning_rate - learning rate for the Adam optimizer
        By default 100 agents are trained and all their lexica and rewards for every epoch are saved (agent-wise).
    """
    
    # setup the training and save the parameters 
    
    n_states = n                      # number of states 
    n_messages = 2*n                  # number of messages
    batch_size = 32                   # batch size
    datasize = 1000                   # number of training data points
    batches = datasize // batch_size  # number of batches per epoch
    
    runs = runs                       # number of runs: 100 speaker-listener pairs are trained 
    init_mean = 0.5                   # mean for initialization of lexicon entries
    init_std = 0.01                   # std for initialization of lexicon entries
    
    constraint = tf.keras.constraints.NonNeg() # constrains the lexica to have entries >= 0
    
    filename = 'data/bilingual/L' + str(reasoning) + '/' + str(n) + '_states/'
    if not os.path.exists(filename):
            os.makedirs(filename)
    
    param_dict = {"n_states": n_states,"n_messages": n_messages, "n_epochs":n_epochs, "batch_size": batch_size,
              "datasize":datasize, "initializer_truncated_normal_mean_std": [init_mean, init_std], 
              "learning_rate":learning_rate, "runs": runs, "constraint":constraint}    
    with open(filename + 'param_dict.pickle', 'wb') as handle:
        pickle.dump(param_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    
    # run the listeners

    for run in range(1,runs+1):
        
        # create data 
        data, labels = generate_bilingual_messages(datasize, n, ME=ME)
        data = tf.convert_to_tensor(data)
        lexica = []
        all_rewards = []
        
        # create listener
        lexicon = tf.Variable(tf.initializers.TruncatedNormal(mean=init_mean, stddev=init_std)
                              ([n_states, n_messages]),
                              name="lexicon", 
                              trainable=True, 
                              dtype=tf.float32,
                              constraint=tf.keras.constraints.NonNeg())
        
        if reasoning == 0: 
            listener = RSAListener0(n_states, n_messages, lexicon)
        elif reasoning == 1: 
            listener = RSAListener1(n_states, n_messages, lexicon, alpha=5.)
                
        listener.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                      loss=tf.keras.losses.CategoricalCrossentropy())
        
        # train
        for i in range(n_epochs):
            average_reward = []
        
            shuffle_indices = np.random.permutation(datasize)
            data = tf.gather(data, shuffle_indices)
            labels = tf.gather(labels, shuffle_indices)

            for j in range(batches):
                data_batch = data[j:j + batch_size]
                labels_batch = labels[j:j + batch_size]
    
                _, actions = listener.get_states(data_batch)
    
                rewards = tf.einsum('ij,ij->i', labels_batch, actions)
                average_reward.append(np.mean(rewards))
                
                # RL: 
                # Note that we implemented REINFORCE with a work-around using categorical crossentropy. 
                # This can be done by setting the labels to the agent's actions, and weighting the loss
                # function by the rewards. 
                listener.train_on_batch(data_batch, actions, sample_weight=rewards)
            
            mean_reward = np.mean(average_reward)
            all_rewards.append(mean_reward)
            lexica.append(np.copy(listener.lexicon[:]))
                        
        print('run ' + str(run), 'average reward ' +str(ME)+ ' ' + str(mean_reward))
        
        # save rewards and lexica 
        if reasoning == 0:
            filename_full = filename + 'L' + str(reasoning) +'_'+ str(ME) + 'missing_'
        elif reasoning == 1:
            filename_full = filename + 'L' + str(reasoning) +'_'+ str(ME) + 'missing_5.0alpha_'
        np.save(filename_full + 'lexicon_run' + str(run), lexica)
        np.save(filename_full + 'rewards_run' + str(run), all_rewards)

## Run the training 

for different agent types, numbers of state (3 and 10) and different number of states being withheld from the training (1 and 2). 

### Literal listener

In [8]:
run_listener(n=3, reasoning=0, n_epochs=50, ME=1)

2021-10-29 16:44:16.047228: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2021-10-29 16:44:16.872877: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 1.0
run 5 average reward 1 1.0
run 6 average reward 1 1.0
run 7 average reward 1 1.0
run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 1.0
run 15 average reward 1 1.0
run 16 average reward 1 1.0
run 17 average reward 1 1.0
run 18 average reward 1 1.0
run 19 average reward 1 1.0
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 1.0
run 24 average reward 1 1.0
run 25 average reward 1 1.0
run 26 average reward 1 1.0
run 27 average reward 1 1.0
run 28 average reward 1 1.0
run 29 average reward 1 1.0
run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 1.0
run 34 average reward 1 1.0
run 35 average reward 1 1.0
run 36 average reward 1 1.0
r

2021-10-29 16:51:09.992407: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:51:10.018244: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 48 average reward 1 1.0
run 49 average reward 1 1.0
run 50 average reward 1 1.0
run 51 average reward 1 1.0
run 52 average reward 1 1.0
run 53 average reward 1 1.0
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 1.0
run 57 average reward 1 1.0
run 58 average reward 1 1.0
run 59 average reward 1 1.0
run 60 average reward 1 1.0
run 61 average reward 1 1.0
run 62 average reward 1 1.0
run 63 average reward 1 1.0
run 64 average reward 1 1.0
run 65 average reward 1 1.0
run 66 average reward 1 1.0


2021-10-29 16:54:03.118060: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:54:05.271468: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 67 average reward 1 1.0
run 68 average reward 1 1.0
run 69 average reward 1 1.0
run 70 average reward 1 1.0
run 71 average reward 1 1.0
run 72 average reward 1 1.0
run 73 average reward 1 1.0
run 74 average reward 1 1.0
run 75 average reward 1 1.0
run 76 average reward 1 1.0
run 77 average reward 1 1.0
run 78 average reward 1 1.0
run 79 average reward 1 1.0


2021-10-29 16:56:14.242631: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:56:14.353303: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:56:14.437720: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:56:15.011022: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:56:15.572380: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 80 average reward 1 1.0
run 81 average reward 1 1.0
run 82 average reward 1 1.0
run 83 average reward 1 1.0
run 84 average reward 1 1.0
run 85 average reward 1 1.0
run 86 average reward 1 1.0


2021-10-29 16:57:28.046482: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 87 average reward 1 1.0
run 88 average reward 1 1.0
run 89 average reward 1 1.0


2021-10-29 16:58:09.393870: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-10-29 16:58:10.135888: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 90 average reward 1 1.0
run 91 average reward 1 1.0
run 92 average reward 1 1.0
run 93 average reward 1 1.0
run 94 average reward 1 1.0
run 95 average reward 1 1.0
run 96 average reward 1 1.0
run 97 average reward 1 1.0
run 98 average reward 1 1.0
run 99 average reward 1 1.0
run 100 average reward 1 1.0


In [9]:
run_listener(learning_rate=0.001, n=3, reasoning=0, n_epochs=50, ME=2)

run 1 average reward 2 1.0
run 2 average reward 2 1.0
run 3 average reward 2 1.0
run 4 average reward 2 1.0
run 5 average reward 2 1.0
run 6 average reward 2 1.0
run 7 average reward 2 1.0
run 8 average reward 2 1.0
run 9 average reward 2 1.0
run 10 average reward 2 1.0
run 11 average reward 2 1.0
run 12 average reward 2 1.0
run 13 average reward 2 1.0
run 14 average reward 2 1.0


2021-11-24 21:57:30.829108: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 21:57:38.850772: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 21:57:41.736935: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 15 average reward 2 1.0
run 16 average reward 2 1.0
run 17 average reward 2 1.0
run 18 average reward 2 1.0
run 19 average reward 2 1.0
run 20 average reward 2 1.0
run 21 average reward 2 1.0
run 22 average reward 2 1.0
run 23 average reward 2 1.0
run 24 average reward 2 1.0
run 25 average reward 2 1.0
run 26 average reward 2 1.0
run 27 average reward 2 1.0
run 28 average reward 2 1.0
run 29 average reward 2 1.0
run 30 average reward 2 1.0
run 31 average reward 2 1.0
run 32 average reward 2 1.0
run 33 average reward 2 1.0
run 34 average reward 2 1.0
run 35 average reward 2 1.0
run 36 average reward 2 1.0
run 37 average reward 2 1.0
run 38 average reward 2 1.0
run 39 average reward 2 1.0
run 40 average reward 2 1.0
run 41 average reward 2 1.0
run 42 average reward 2 1.0
run 43 average reward 2 1.0
run 44 average reward 2 1.0
run 45 average reward 2 1.0


2021-11-24 22:03:56.315813: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 46 average reward 2 1.0
run 47 average reward 2 1.0
run 48 average reward 2 1.0
run 49 average reward 2 1.0
run 50 average reward 2 1.0
run 51 average reward 2 1.0
run 52 average reward 2 1.0
run 53 average reward 2 1.0
run 54 average reward 2 1.0
run 55 average reward 2 1.0
run 56 average reward 2 1.0
run 57 average reward 2 1.0
run 58 average reward 2 1.0
run 59 average reward 2 1.0
run 60 average reward 2 1.0
run 61 average reward 2 1.0
run 62 average reward 2 1.0


2021-11-24 22:06:41.579144: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 63 average reward 2 1.0
run 64 average reward 2 1.0


2021-11-24 22:07:00.956833: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 65 average reward 2 1.0
run 66 average reward 2 1.0
run 67 average reward 2 1.0
run 68 average reward 2 1.0
run 69 average reward 2 1.0
run 70 average reward 2 1.0
run 71 average reward 2 1.0
run 72 average reward 2 1.0
run 73 average reward 2 1.0
run 74 average reward 2 1.0
run 75 average reward 2 1.0
run 76 average reward 2 1.0
run 77 average reward 2 1.0
run 78 average reward 2 1.0
run 79 average reward 2 1.0
run 80 average reward 2 1.0
run 81 average reward 2 1.0
run 82 average reward 2 1.0
run 83 average reward 2 1.0
run 84 average reward 2 1.0
run 85 average reward 2 1.0
run 86 average reward 2 1.0
run 87 average reward 2 1.0
run 88 average reward 2 1.0
run 89 average reward 2 1.0
run 90 average reward 2 1.0
run 91 average reward 2 1.0
run 92 average reward 2 1.0
run 93 average reward 2 1.0
run 94 average reward 2 1.0
run 95 average reward 2 1.0
run 96 average reward 2 1.0
run 97 average reward 2 1.0
run 98 average reward 2 1.0
run 99 average reward 2 1.0
run 100 average rewa

In [6]:
run_listener(learning_rate=0.001, n=10, reasoning=0, n_epochs=100, ME=1)

2021-11-24 15:43:39.438946: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 1.0
run 5 average reward 1 1.0
run 6 average reward 1 1.0


2021-11-24 15:45:41.325105: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 7 average reward 1 1.0
run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 1.0
run 15 average reward 1 1.0
run 16 average reward 1 1.0
run 17 average reward 1 1.0
run 18 average reward 1 1.0
run 19 average reward 1 1.0
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 1.0
run 24 average reward 1 1.0
run 25 average reward 1 1.0
run 26 average reward 1 1.0
run 27 average reward 1 1.0
run 28 average reward 1 1.0
run 29 average reward 1 1.0
run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 1.0
run 34 average reward 1 1.0
run 35 average reward 1 1.0
run 36 average reward 1 1.0
run 37 average reward 1 1.0
run 38 average reward 1 1.0
run 39 average reward 1 1.0
run 40 average reward 1 1.0
run 41 average reward 1 1.0
run 42 average reward 1

2021-11-24 15:59:02.967358: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 53 average reward 1 1.0
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 1.0
run 57 average reward 1 1.0
run 58 average reward 1 1.0
run 59 average reward 1 1.0
run 60 average reward 1 1.0
run 61 average reward 1 1.0
run 62 average reward 1 1.0
run 63 average reward 1 1.0
run 64 average reward 1 1.0
run 65 average reward 1 1.0
run 66 average reward 1 1.0
run 67 average reward 1 1.0
run 68 average reward 1 1.0
run 69 average reward 1 1.0
run 70 average reward 1 1.0
run 71 average reward 1 1.0
run 72 average reward 1 1.0
run 73 average reward 1 1.0
run 74 average reward 1 1.0
run 75 average reward 1 1.0
run 76 average reward 1 1.0
run 77 average reward 1 1.0
run 78 average reward 1 1.0
run 79 average reward 1 1.0
run 80 average reward 1 1.0
run 81 average reward 1 1.0
run 82 average reward 1 1.0
run 83 average reward 1 1.0
run 84 average reward 1 1.0
run 85 average reward 1 1.0
run 86 average reward 1 1.0
run 87 average reward 1 1.0
run 88 average rewar

In [10]:
run_listener(learning_rate=0.001, n=10, reasoning=0, n_epochs=100, ME=2)

run 1 average reward 2 1.0
run 2 average reward 2 1.0
run 3 average reward 2 1.0
run 4 average reward 2 1.0
run 5 average reward 2 1.0
run 6 average reward 2 1.0
run 7 average reward 2 1.0
run 8 average reward 2 1.0
run 9 average reward 2 1.0
run 10 average reward 2 1.0
run 11 average reward 2 1.0
run 12 average reward 2 1.0
run 13 average reward 2 1.0
run 14 average reward 2 1.0
run 15 average reward 2 1.0
run 16 average reward 2 1.0
run 17 average reward 2 1.0
run 18 average reward 2 1.0
run 19 average reward 2 1.0
run 20 average reward 2 1.0
run 21 average reward 2 1.0
run 22 average reward 2 1.0
run 23 average reward 2 1.0
run 24 average reward 2 1.0
run 25 average reward 2 1.0
run 26 average reward 2 1.0
run 27 average reward 2 1.0
run 28 average reward 2 1.0
run 29 average reward 2 1.0
run 30 average reward 2 1.0
run 31 average reward 2 1.0
run 32 average reward 2 1.0
run 33 average reward 2 1.0
run 34 average reward 2 1.0
run 35 average reward 2 1.0
run 36 average reward 2 1.0
r

2021-11-24 22:34:22.588525: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 41 average reward 2 1.0
run 42 average reward 2 1.0
run 43 average reward 2 1.0
run 44 average reward 2 1.0
run 45 average reward 2 1.0
run 46 average reward 2 1.0
run 47 average reward 2 1.0
run 48 average reward 2 1.0
run 49 average reward 2 1.0
run 50 average reward 2 1.0
run 51 average reward 2 1.0
run 52 average reward 2 1.0
run 53 average reward 2 1.0
run 54 average reward 2 1.0


2021-11-24 22:40:22.659102: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 22:40:22.689229: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 22:40:22.723692: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 22:40:22.763462: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 55 average reward 2 1.0
run 56 average reward 2 1.0
run 57 average reward 2 1.0
run 58 average reward 2 1.0
run 59 average reward 2 1.0
run 60 average reward 2 1.0
run 61 average reward 2 1.0
run 62 average reward 2 1.0
run 63 average reward 2 1.0
run 64 average reward 2 1.0
run 65 average reward 2 1.0
run 66 average reward 2 1.0
run 67 average reward 2 1.0
run 68 average reward 2 1.0
run 69 average reward 2 1.0
run 70 average reward 2 1.0
run 71 average reward 2 1.0
run 72 average reward 2 1.0
run 73 average reward 2 1.0
run 74 average reward 2 1.0
run 75 average reward 2 1.0
run 76 average reward 2 1.0
run 77 average reward 2 1.0
run 78 average reward 2 1.0
run 79 average reward 2 1.0
run 80 average reward 2 1.0
run 81 average reward 2 1.0
run 82 average reward 2 1.0
run 83 average reward 2 1.0
run 84 average reward 2 1.0
run 85 average reward 2 1.0
run 86 average reward 2 1.0
run 87 average reward 2 1.0
run 88 average reward 2 1.0
run 89 average reward 2 1.0
run 90 average rewar

### Pragmatic listener

In [7]:
run_listener(learning_rate=0.001, n=3, reasoning=1, n_epochs=50, ME=1)

run 1 average reward 1 0.9989919
run 2 average reward 1 0.9979839
run 3 average reward 1 0.9989919
run 4 average reward 1 0.9989919
run 5 average reward 1 0.9989919
run 6 average reward 1 0.9979839
run 7 average reward 1 0.9979839
run 8 average reward 1 1.0
run 9 average reward 1 0.9989919
run 10 average reward 1 0.9979839
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 0.9979839
run 15 average reward 1 0.9989919
run 16 average reward 1 0.9989919
run 17 average reward 1 1.0
run 18 average reward 1 1.0
run 19 average reward 1 0.9979839
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 1.0
run 24 average reward 1 1.0
run 25 average reward 1 0.9989919
run 26 average reward 1 0.9989919
run 27 average reward 1 1.0


2021-11-24 16:16:16.719143: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 28 average reward 1 0.9989919
run 29 average reward 1 0.9979839
run 30 average reward 1 1.0
run 31 average reward 1 0.9979839
run 32 average reward 1 0.9989919
run 33 average reward 1 1.0
run 34 average reward 1 0.9979839
run 35 average reward 1 1.0
run 36 average reward 1 0.9989919
run 37 average reward 1 1.0
run 38 average reward 1 1.0
run 39 average reward 1 1.0
run 40 average reward 1 0.9979839
run 41 average reward 1 1.0
run 42 average reward 1 0.9989919
run 43 average reward 1 0.9989919
run 44 average reward 1 1.0
run 45 average reward 1 1.0
run 46 average reward 1 1.0
run 47 average reward 1 0.9989919
run 48 average reward 1 0.9979839
run 49 average reward 1 0.9979839
run 50 average reward 1 0.9979839
run 51 average reward 1 0.9969758
run 52 average reward 1 1.0
run 53 average reward 1 0.9989919
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 1.0
run 57 average reward 1 1.0
run 58 average reward 1 1.0
run 59 average reward 1 1.0
run 60 average

In [11]:
run_listener(learning_rate=0.001, n=3, reasoning=1, n_epochs=50, ME=2)

run 1 average reward 2 1.0
run 2 average reward 2 1.0
run 3 average reward 2 1.0
run 4 average reward 2 1.0
run 5 average reward 2 1.0
run 6 average reward 2 0.9989919
run 7 average reward 2 1.0
run 8 average reward 2 1.0
run 9 average reward 2 1.0
run 10 average reward 2 1.0
run 11 average reward 2 1.0
run 12 average reward 2 1.0
run 13 average reward 2 0.9989919
run 14 average reward 2 0.9989919
run 15 average reward 2 1.0
run 16 average reward 2 1.0
run 17 average reward 2 1.0
run 18 average reward 2 1.0
run 19 average reward 2 0.9989919
run 20 average reward 2 0.9989919
run 21 average reward 2 0.9979839
run 22 average reward 2 1.0
run 23 average reward 2 0.9989919
run 24 average reward 2 0.9989919
run 25 average reward 2 0.9989919
run 26 average reward 2 1.0
run 27 average reward 2 0.9989919


2021-11-24 23:04:19.384613: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 23:04:19.743607: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 28 average reward 2 0.9989919


2021-11-24 23:04:29.458222: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 23:04:30.548491: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 29 average reward 2 1.0
run 30 average reward 2 1.0
run 31 average reward 2 1.0
run 32 average reward 2 1.0
run 33 average reward 2 0.9989919
run 34 average reward 2 0.9989919
run 35 average reward 2 1.0
run 36 average reward 2 0.9989919


2021-11-24 23:06:45.311406: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 37 average reward 2 1.0
run 38 average reward 2 1.0
run 39 average reward 2 1.0
run 40 average reward 2 1.0
run 41 average reward 2 1.0
run 42 average reward 2 1.0
run 43 average reward 2 1.0
run 44 average reward 2 1.0
run 45 average reward 2 1.0
run 46 average reward 2 1.0
run 47 average reward 2 1.0
run 48 average reward 2 1.0
run 49 average reward 2 1.0
run 50 average reward 2 1.0
run 51 average reward 2 1.0
run 52 average reward 2 0.9989919
run 53 average reward 2 1.0
run 54 average reward 2 1.0
run 55 average reward 2 1.0
run 56 average reward 2 1.0
run 57 average reward 2 0.9989919
run 58 average reward 2 1.0
run 59 average reward 2 1.0
run 60 average reward 2 1.0
run 61 average reward 2 1.0
run 62 average reward 2 0.9989919
run 63 average reward 2 1.0
run 64 average reward 2 0.9989919
run 65 average reward 2 0.9989919
run 66 average reward 2 0.9989919
run 67 average reward 2 0.9989919
run 68 average reward 2 1.0
run 69 average reward 2 1.0
run 70 average reward 2 1.0
run 71

2021-11-24 23:17:57.985550: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 91 average reward 2 0.9989919
run 92 average reward 2 1.0
run 93 average reward 2 1.0
run 94 average reward 2 1.0
run 95 average reward 2 1.0
run 96 average reward 2 1.0
run 97 average reward 2 1.0
run 98 average reward 2 1.0
run 99 average reward 2 0.9989919
run 100 average reward 2 1.0


In [14]:
run_listener(learning_rate=0.001, n=6, reasoning=1, n_epochs=100, ME=1)

run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 1.0
run 5 average reward 1 1.0
run 6 average reward 1 1.0
run 7 average reward 1 1.0


2021-11-26 08:54:20.228310: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 0.9989919
run 13 average reward 1 1.0


2021-11-26 08:56:20.234000: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 14 average reward 1 1.0
run 15 average reward 1 1.0
run 16 average reward 1 1.0
run 17 average reward 1 1.0
run 18 average reward 1 1.0
run 19 average reward 1 1.0
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 1.0
run 24 average reward 1 1.0
run 25 average reward 1 1.0
run 26 average reward 1 0.9989919
run 27 average reward 1 0.9989919
run 28 average reward 1 1.0
run 29 average reward 1 1.0
run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 1.0
run 34 average reward 1 1.0
run 35 average reward 1 1.0
run 36 average reward 1 0.9989919
run 37 average reward 1 1.0
run 38 average reward 1 0.9989919
run 39 average reward 1 1.0


2021-11-26 09:07:52.137957: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 40 average reward 1 1.0
run 41 average reward 1 1.0
run 42 average reward 1 1.0
run 43 average reward 1 0.9989919
run 44 average reward 1 1.0
run 45 average reward 1 1.0
run 46 average reward 1 0.9989919
run 47 average reward 1 1.0
run 48 average reward 1 1.0
run 49 average reward 1 0.9989919
run 50 average reward 1 1.0
run 51 average reward 1 1.0
run 52 average reward 1 1.0
run 53 average reward 1 0.9979839
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 0.9979839
run 57 average reward 1 1.0
run 58 average reward 1 1.0
run 59 average reward 1 0.9989919
run 60 average reward 1 1.0
run 61 average reward 1 1.0
run 62 average reward 1 1.0
run 63 average reward 1 0.9989919
run 64 average reward 1 1.0
run 65 average reward 1 1.0
run 66 average reward 1 0.9989919


2021-11-26 09:18:22.655524: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 67 average reward 1 1.0
run 68 average reward 1 0.9989919
run 69 average reward 1 1.0
run 70 average reward 1 1.0
run 71 average reward 1 1.0
run 72 average reward 1 1.0
run 73 average reward 1 1.0
run 74 average reward 1 0.9989919
run 75 average reward 1 1.0
run 76 average reward 1 1.0
run 77 average reward 1 1.0
run 78 average reward 1 1.0
run 79 average reward 1 1.0
run 80 average reward 1 1.0
run 81 average reward 1 1.0
run 82 average reward 1 0.9989919
run 83 average reward 1 1.0
run 84 average reward 1 0.9989919
run 85 average reward 1 1.0
run 86 average reward 1 1.0
run 87 average reward 1 1.0
run 88 average reward 1 1.0
run 89 average reward 1 1.0
run 90 average reward 1 0.9989919
run 91 average reward 1 1.0
run 92 average reward 1 0.9989919
run 93 average reward 1 1.0
run 94 average reward 1 1.0
run 95 average reward 1 1.0
run 96 average reward 1 0.9989919
run 97 average reward 1 1.0
run 98 average reward 1 1.0
run 99 average reward 1 1.0
run 100 average reward 1 1.0


In [15]:
run_listener(learning_rate=0.001, n=7, reasoning=1, n_epochs=100, ME=1)

run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 0.9989919
run 5 average reward 1 0.9979839
run 6 average reward 1 0.9979839
run 7 average reward 1 0.9989919
run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 1.0
run 15 average reward 1 1.0
run 16 average reward 1 1.0
run 17 average reward 1 0.9989919
run 18 average reward 1 1.0
run 19 average reward 1 0.9969758
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 1.0
run 24 average reward 1 0.9989919
run 25 average reward 1 1.0
run 26 average reward 1 1.0
run 27 average reward 1 1.0
run 28 average reward 1 0.9989919
run 29 average reward 1 0.9989919
run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 0.9989919
run 34 average reward 1 1

2021-11-26 09:48:52.455390: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 40 average reward 1 0.9989919
run 41 average reward 1 1.0


2021-11-26 09:49:59.971783: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:50:02.121280: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 42 average reward 1 1.0
run 43 average reward 1 1.0
run 44 average reward 1 1.0
run 45 average reward 1 1.0
run 46 average reward 1 0.9989919


2021-11-26 09:51:59.075061: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 47 average reward 1 0.9989919


2021-11-26 09:52:15.224977: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:52:15.788475: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 48 average reward 1 1.0
run 49 average reward 1 1.0
run 50 average reward 1 1.0
run 51 average reward 1 1.0
run 52 average reward 1 1.0
run 53 average reward 1 1.0
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 1.0
run 57 average reward 1 0.9989919
run 58 average reward 1 1.0
run 59 average reward 1 1.0
run 60 average reward 1 1.0


2021-11-26 09:56:55.901091: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:56:55.973642: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:56:56.062183: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:56:56.135074: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:56:56.173729: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 09:56:56.188894: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 61 average reward 1 0.9969758
run 62 average reward 1 0.9989919
run 63 average reward 1 1.0
run 64 average reward 1 1.0
run 65 average reward 1 1.0
run 66 average reward 1 1.0
run 67 average reward 1 1.0
run 68 average reward 1 1.0
run 69 average reward 1 1.0


2021-11-26 10:00:22.248823: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:00:22.427684: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:00:22.456199: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 70 average reward 1 1.0
run 71 average reward 1 1.0
run 72 average reward 1 0.9989919
run 73 average reward 1 1.0
run 74 average reward 1 1.0
run 75 average reward 1 1.0
run 76 average reward 1 1.0
run 77 average reward 1 1.0
run 78 average reward 1 0.9989919
run 79 average reward 1 1.0
run 80 average reward 1 1.0
run 81 average reward 1 1.0
run 82 average reward 1 0.9989919
run 83 average reward 1 1.0
run 84 average reward 1 1.0
run 85 average reward 1 1.0


2021-11-26 10:06:53.592788: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 86 average reward 1 1.0
run 87 average reward 1 1.0
run 88 average reward 1 1.0
run 89 average reward 1 1.0
run 90 average reward 1 1.0
run 91 average reward 1 0.9989919
run 92 average reward 1 1.0
run 93 average reward 1 1.0
run 94 average reward 1 1.0
run 95 average reward 1 1.0
run 96 average reward 1 1.0
run 97 average reward 1 0.9979839
run 98 average reward 1 0.9989919
run 99 average reward 1 1.0
run 100 average reward 1 0.9979839


In [16]:
run_listener(learning_rate=0.001, n=8, reasoning=1, n_epochs=100, ME=1)

run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 1.0
run 5 average reward 1 0.9989919
run 6 average reward 1 1.0
run 7 average reward 1 1.0
run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 0.9989919
run 15 average reward 1 1.0
run 16 average reward 1 0.9979839
run 17 average reward 1 1.0
run 18 average reward 1 1.0
run 19 average reward 1 0.9989919
run 20 average reward 1 0.9989919
run 21 average reward 1 0.9989919
run 22 average reward 1 1.0
run 23 average reward 1 0.9979839
run 24 average reward 1 1.0
run 25 average reward 1 1.0
run 26 average reward 1 0.9989919
run 27 average reward 1 1.0
run 28 average reward 1 1.0
run 29 average reward 1 1.0


2021-11-26 10:21:54.925485: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:22:02.091901: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 1.0
run 34 average reward 1 1.0
run 35 average reward 1 1.0
run 36 average reward 1 0.9989919


2021-11-26 10:24:31.050586: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 37 average reward 1 1.0
run 38 average reward 1 1.0
run 39 average reward 1 0.9989919
run 40 average reward 1 1.0
run 41 average reward 1 0.9989919
run 42 average reward 1 1.0
run 43 average reward 1 1.0
run 44 average reward 1 0.9989919
run 45 average reward 1 1.0
run 46 average reward 1 1.0
run 47 average reward 1 1.0
run 48 average reward 1 1.0
run 49 average reward 1 0.9979839
run 50 average reward 1 1.0
run 51 average reward 1 0.9989919
run 52 average reward 1 0.9989919
run 53 average reward 1 1.0
run 54 average reward 1 1.0
run 55 average reward 1 1.0
run 56 average reward 1 1.0
run 57 average reward 1 1.0
run 58 average reward 1 1.0
run 59 average reward 1 1.0
run 60 average reward 1 0.9989919
run 61 average reward 1 0.9989919
run 62 average reward 1 1.0
run 63 average reward 1 0.9989919
run 64 average reward 1 1.0


2021-11-26 10:35:06.844030: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:35:06.877982: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:35:06.956466: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:35:16.980115: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 65 average reward 1 0.9979839
run 66 average reward 1 1.0
run 67 average reward 1 1.0
run 68 average reward 1 0.9989919
run 69 average reward 1 1.0
run 70 average reward 1 1.0


2021-11-26 10:37:47.305480: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:37:47.392657: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 71 average reward 1 1.0
run 72 average reward 1 1.0
run 73 average reward 1 1.0
run 74 average reward 1 1.0
run 75 average reward 1 0.9979839
run 76 average reward 1 1.0
run 77 average reward 1 1.0
run 78 average reward 1 1.0
run 79 average reward 1 0.9989919
run 80 average reward 1 0.9989919
run 81 average reward 1 0.9989919
run 82 average reward 1 0.9989919
run 83 average reward 1 1.0


2021-11-26 10:43:21.133493: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 84 average reward 1 1.0
run 85 average reward 1 1.0
run 86 average reward 1 1.0
run 87 average reward 1 1.0
run 88 average reward 1 1.0
run 89 average reward 1 0.9989919
run 90 average reward 1 1.0
run 91 average reward 1 1.0
run 92 average reward 1 0.9979839
run 93 average reward 1 1.0
run 94 average reward 1 0.9989919


2021-11-26 10:48:28.373263: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:48:43.536913: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 95 average reward 1 1.0
run 96 average reward 1 1.0
run 97 average reward 1 0.9989919
run 98 average reward 1 1.0
run 99 average reward 1 1.0
run 100 average reward 1 1.0


In [17]:
run_listener(learning_rate=0.001, n=9, reasoning=1, n_epochs=100, ME=1)

2021-11-26 10:52:44.078969: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 1 average reward 1 1.0
run 2 average reward 1 1.0


2021-11-26 10:53:50.900607: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 3 average reward 1 1.0
run 4 average reward 1 1.0
run 5 average reward 1 0.9989919


2021-11-26 10:54:59.748163: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:55:01.045025: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 10:55:23.125626: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 6 average reward 1 0.9989919
run 7 average reward 1 1.0
run 8 average reward 1 1.0
run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 0.9989919
run 13 average reward 1 1.0
run 14 average reward 1 1.0
run 15 average reward 1 1.0


2021-11-26 11:01:34.360786: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:01:36.193721: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:01:36.305463: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 16 average reward 1 0.9989919


2021-11-26 11:02:13.631945: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 17 average reward 1 0.9979839
run 18 average reward 1 1.0
run 19 average reward 1 1.0
run 20 average reward 1 0.9989919
run 21 average reward 1 1.0
run 22 average reward 1 0.9989919
run 23 average reward 1 1.0
run 24 average reward 1 0.9989919
run 25 average reward 1 1.0
run 26 average reward 1 1.0
run 27 average reward 1 1.0
run 28 average reward 1 1.0
run 29 average reward 1 0.9989919
run 30 average reward 1 0.9989919
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 0.9979839
run 34 average reward 1 0.9989919
run 35 average reward 1 1.0
run 36 average reward 1 0.9989919
run 37 average reward 1 1.0
run 38 average reward 1 0.9989919
run 39 average reward 1 1.0
run 40 average reward 1 1.0
run 41 average reward 1 1.0
run 42 average reward 1 1.0
run 43 average reward 1 0.9989919
run 44 average reward 1 0.9989919
run 45 average reward 1 0.9989919
run 46 average reward 1 1.0
run 47 average reward 1 1.0
run 48 average reward 1 0.9979839
run 49 average rewar

2021-11-26 11:29:16.589652: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:16.705952: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:17.747941: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:19.748328: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:20.032341: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:20.111873: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:21.191272: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-26 11:29:21.676843: W tensorflow/core/data/root_dataset.cc:16

run 84 average reward 1 1.0
run 85 average reward 1 1.0
run 86 average reward 1 0.9989919
run 87 average reward 1 0.9989919
run 88 average reward 1 1.0
run 89 average reward 1 1.0
run 90 average reward 1 1.0
run 91 average reward 1 1.0
run 92 average reward 1 1.0
run 93 average reward 1 1.0
run 94 average reward 1 1.0
run 95 average reward 1 0.9989919
run 96 average reward 1 1.0
run 97 average reward 1 1.0
run 98 average reward 1 0.9989919
run 99 average reward 1 1.0
run 100 average reward 1 1.0


In [None]:
run_listener(learning_rate=0.001, n=10, reasoning=1, n_epochs=100, ME=1)

2021-11-25 14:40:07.870729: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-25 14:40:07.948317: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-25 14:40:10.484649: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-25 14:40:10.536918: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-25 14:40:10.805334: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 1 average reward 1 1.0
run 2 average reward 1 1.0
run 3 average reward 1 1.0
run 4 average reward 1 0.9989919
run 5 average reward 1 1.0
run 6 average reward 1 1.0
run 7 average reward 1 1.0
run 8 average reward 1 1.0


2021-11-25 14:44:34.206351: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 9 average reward 1 1.0
run 10 average reward 1 1.0
run 11 average reward 1 1.0
run 12 average reward 1 1.0
run 13 average reward 1 1.0
run 14 average reward 1 1.0
run 15 average reward 1 0.9989919
run 16 average reward 1 0.9989919
run 17 average reward 1 1.0
run 18 average reward 1 0.9989919
run 19 average reward 1 1.0
run 20 average reward 1 1.0
run 21 average reward 1 1.0
run 22 average reward 1 1.0
run 23 average reward 1 0.9989919
run 24 average reward 1 0.9989919
run 25 average reward 1 0.9979839
run 26 average reward 1 0.9989919
run 27 average reward 1 1.0
run 28 average reward 1 1.0
run 29 average reward 1 1.0
run 30 average reward 1 1.0
run 31 average reward 1 1.0
run 32 average reward 1 1.0
run 33 average reward 1 1.0
run 34 average reward 1 1.0
run 35 average reward 1 1.0
run 36 average reward 1 0.9989919
run 37 average reward 1 1.0
run 38 average reward 1 1.0
run 39 average reward 1 1.0
run 40 average reward 1 0.9979839
run 41 average reward 1 1.0
run 42 average reward 1

In [12]:
run_listener(learning_rate=0.001, n=10, reasoning=1, n_epochs=100, ME=2)

run 1 average reward 2 1.0
run 2 average reward 2 1.0
run 3 average reward 2 1.0
run 4 average reward 2 1.0
run 5 average reward 2 0.9989919
run 6 average reward 2 1.0
run 7 average reward 2 1.0
run 8 average reward 2 0.9989919
run 9 average reward 2 1.0
run 10 average reward 2 1.0
run 11 average reward 2 0.9989919
run 12 average reward 2 1.0
run 13 average reward 2 1.0
run 14 average reward 2 1.0
run 15 average reward 2 1.0
run 16 average reward 2 1.0
run 17 average reward 2 1.0
run 18 average reward 2 0.9979839
run 19 average reward 2 0.9989919
run 20 average reward 2 0.9989919
run 21 average reward 2 1.0
run 22 average reward 2 1.0
run 23 average reward 2 0.9989919
run 24 average reward 2 1.0
run 25 average reward 2 1.0
run 26 average reward 2 1.0
run 27 average reward 2 0.9979839
run 28 average reward 2 1.0
run 29 average reward 2 1.0
run 30 average reward 2 1.0
run 31 average reward 2 0.9989919
run 32 average reward 2 0.9989919
run 33 average reward 2 1.0
run 34 average reward 2 0

2021-11-24 23:37:03.786365: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled
2021-11-24 23:37:13.840176: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 43 average reward 2 1.0


2021-11-24 23:37:33.187772: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 44 average reward 2 1.0
run 45 average reward 2 0.9989919
run 46 average reward 2 1.0


2021-11-24 23:39:18.236749: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 47 average reward 2 0.9989919
run 48 average reward 2 0.9989919
run 49 average reward 2 1.0
run 50 average reward 2 1.0
run 51 average reward 2 1.0
run 52 average reward 2 0.9989919
run 53 average reward 2 1.0
run 54 average reward 2 1.0
run 55 average reward 2 1.0
run 56 average reward 2 1.0
run 57 average reward 2 1.0
run 58 average reward 2 0.9989919
run 59 average reward 2 1.0
run 60 average reward 2 1.0
run 61 average reward 2 1.0
run 62 average reward 2 1.0
run 63 average reward 2 0.9989919
run 64 average reward 2 1.0
run 65 average reward 2 1.0
run 66 average reward 2 1.0
run 67 average reward 2 1.0
run 68 average reward 2 1.0
run 69 average reward 2 0.9989919
run 70 average reward 2 1.0
run 71 average reward 2 1.0
run 72 average reward 2 1.0
run 73 average reward 2 0.9989919
run 74 average reward 2 1.0
run 75 average reward 2 1.0
run 76 average reward 2 1.0
run 77 average reward 2 1.0
run 78 average reward 2 1.0
run 79 average reward 2 1.0
run 80 average reward 2 1.0
run 81

2021-11-24 23:55:07.325306: W tensorflow/core/data/root_dataset.cc:167] Optimization loop failed: Cancelled: Operation was cancelled


run 91 average reward 2 1.0
run 92 average reward 2 1.0
run 93 average reward 2 1.0
run 94 average reward 2 1.0
run 95 average reward 2 1.0
run 96 average reward 2 1.0
run 97 average reward 2 1.0
run 98 average reward 2 1.0
run 99 average reward 2 1.0
run 100 average reward 2 1.0
