# Imports and Initialise Env

In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
import numpy as np
from mlagents_envs.environment import ActionTuple

In [2]:
file_name1 = "C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S2 180723\\build"
file_name2 = "C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S2_test 180723\\build"

env1 =  UE(file_name=file_name1,seed=1,side_channels=[],worker_id=1,no_graphics = False)
env1.reset()

env2 =  UE(file_name=file_name2,seed=1,side_channels=[],worker_id=2,no_graphics = False)
env2.reset()

# behavior_names = list(env.behavior_specs.keys())
# behavior_value = list(env.behavior_specs.values())
# DecisionSteps, TerminalSteps = env.get_steps(behavior_names[0])
# agentsNum = len(DecisionSteps.agent_id)

In [7]:
# env1.close()
# env2.close()

#  Model

In [4]:
# model
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

vision_output_dim = 3136
num_words = 35  # Number of unique words in the vocabulary
language_output_dim = 128
embedding_dim = 128
mixing_dim = 256
lstm_hidden_dim = 256
num_actions = 4

# (3,128,128) --> (64,7,7) = 3136 (3-layer CNN)
class VisualModule(nn.Module): 
    def __init__(self):
        super(VisualModule, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=3, padding=0),
            nn.ReLU()
        )
        # self.conv = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(128, 64, kernel_size=5, stride=2, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

    def forward(self, vt):
        encoded_vt = self.conv(vt)
        return encoded_vt.view(vt.size(0), -1).squeeze()

# one-hot encoding [0 0 1 0 0] --> 128 dimensional embedding (FF)
# S1:5 S2:5 S3:11 S4:9 --> 30 + 5 (noun) = 35 in total
class LanguageModule(nn.Module): 
    def __init__(self, num_words, embedding_dim):
        super(LanguageModule, self).__init__()
        self.embedding = nn.Linear(num_words, embedding_dim)

    def forward(self, lt):
        embedded_lt = self.embedding(lt)
        return embedded_lt

# 3136(vision) + 128 (language) --> 256 dimensional embedding (FF)
class MixingModule(nn.Module):
    def __init__(self, vision_output_dim, language_output_dim, mixing_dim):
        super(MixingModule, self).__init__()
        self.linear = nn.Linear(vision_output_dim + language_output_dim, mixing_dim)

    def forward(self, vision_output, language_output):
        combined_output = torch.cat((vision_output, language_output), dim=0)
        mixed_output = self.linear(combined_output)
        return mixed_output

class LSTMModule(nn.Module):
    def __init__(self,mixing_dim,lstm_hidden_dim):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTMCell(mixing_dim, lstm_hidden_dim)
    
    def forward(self,mixed_output,lstm_hidden_state):
        lstm_hidden_state = self.lstm(mixed_output, lstm_hidden_state) 
        # lstm_output = lstm_hidden_state[0] # output is (hidden_state,cell_state), we need hidden state, shape (1,256)
        return lstm_hidden_state

class Agent(nn.Module):
    def __init__(self, num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions):
        super(Agent, self).__init__()
        self.language_module = LanguageModule(num_words, embedding_dim)
        self.visual_module = VisualModule()
        self.mixing_module = MixingModule(vision_output_dim, language_output_dim, mixing_dim)
        self.lstm_module = LSTMModule(mixing_dim, lstm_hidden_dim)
        self.action_predictor = nn.Linear(lstm_hidden_dim, num_actions)
        self.value_estimator = nn.Linear(lstm_hidden_dim, 1)

    def forward(self, vt, lt, lstm_hidden_state):
        vision_output = self.visual_module(vt)
        language_output = self.language_module(lt)
        mixed_output = self.mixing_module(vision_output, language_output).unsqueeze(0)
        lstm_output = self.lstm_module(mixed_output,lstm_hidden_state)
        action_probs = self.action_predictor(lstm_output[0]) 
        value_estimate = self.value_estimator(lstm_output[0])
        return action_probs,value_estimate,lstm_output
        
        
    def save(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.state_dict(), os.path.join(path, f'agent_{episode}.pt'))

    def load(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        self.load_state_dict(torch.load(os.path.join(path, f'agent_{episode}.pt')))   
        
#     def load(self,path):
#         self.load_state_dict(torch.load(path))

# Train

1: with skip steps: set speed=1, lr = 3.5e-5  #LR
2: without skip steps: set speed=3, lr = 3.5e-5  #LR
3: lr = 5e-5  #LR
4: lr = 10e-5  #LR

increase LR will learn faster but training may collapse if too fast

reward structure

hit target: +10
hit wall: -1
hit wrong target: -3
max steps reached: -10

s1a 100,000 20 hrs

In [5]:
# 
import argparse
import time
import json
# import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
# add arguments in command --train/test
# parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
# parser.add_argument('--train', dest='train', action='store_true', default=False)
# parser.add_argument('--test', dest='test', action='store_true', default=True)
# args = parser.parse_args()

train = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) 

# load from S0 - these are the S0 best model details
S0_ALG_NAME = 'S0'
S0_ENV_ID = '3'
S0_episode = 7593

ALG_NAME = 'S2'
ENV_ID = '8'
TRAIN_EPISODES = 200000  # number of overall episodes for training
MAX_STEPS = 500  # maximum time step in one episode
LAM = 0.95  # reward discount in TD error
env_per_iteration = 100
lr = 3.5e-5  #LR
speed = 1
num_steps = 250 # the step for updating the network
max_step_reward = -10

if __name__ == '__main__':
    agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
    agent.load(S0_episode,S0_ALG_NAME,S0_ENV_ID)
    agent.to(device)
    optimizer = optim.RMSprop(agent.parameters(), lr=lr)
    best_score = float('-inf') #this is -infinity
    
    object_hashmap = {
    0: 'capsule',
    1: 'cube',
    2: 'cylinder',
    3: 'prism',
    4: 'sphere'}
    colour_hashmap = {
    0: 'red',
    1: 'green',
    2: 'blue',
    3: 'yellow',
    4: 'black'}
    
    if train:
        entropy_term = 0
        all_episode_reward = []
        all_average_reward = []
        all_steps = []
        all_actor_loss = []
        all_critic_loss = []
        all_entropy_loss = []
        all_total_loss = []
        tracked_agent = -1
        
        for episode in range(TRAIN_EPISODES):
            t0 = time.time()
            episode_reward = 0
                        
            env_index = (episode // env_per_iteration) % 2
            if env_index == 0: env = env1
            else: env = env2
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            STEPS = 0
            
            decision_steps, terminal_steps = env.get_steps(behavior_name)
            # state -- vt, lt, lstm
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device) #vector image
            object_index = int(decision_steps.obs[1][0][0])
            colour_index = int(decision_steps.obs[1][0][1])
            print(f'--- target colour: {colour_hashmap[colour_index]}, target object: {object_hashmap[object_index]} ---')
            # objects: 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
            # colours: 0-red,1-green,2-blue,3-yellow,4-black 
            
#             lt = torch.eye(num_words)[:, index].to(device) #one hot encoder language vector
            lt = torch.zeros(35).to(device)
            lt[object_index],lt[colour_index+5] = 1,1 #one hot encoder language vector
            
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            
            done = False
            while True: #training loop
                
                # Need to use when calculating the loss
                log_probs = []
                values = torch.empty(0).to(device)
                rewards = []

                for steps in range(num_steps):
                    if STEPS % 5 == 0: #this is the skip steps
                        lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                        policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                    STEPS += 1
#                     lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
#                     policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
#                     STEPS += 1
                    dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy() #use softmax to get prob dist

                    action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1)) #put prob dist into Categorical class to sample
                    action = action_dist.sample() # sample an action from action_dist
                    action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
                    
                    log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # entropy = -np.sum(np.mean(dist)* np.log(dist))
                    entropy = F.cross_entropy(policy_dist.detach(), action)

                    discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                    action_tuple = ActionTuple()
                    action_tuple.add_discrete(discrete_actions)
                    env.set_actions(behavior_name,action_tuple)
                    
                    env.step()
                    decision_steps, terminal_steps = env.get_steps(behavior_name)

                    if tracked_agent == -1 and len(decision_steps) >= 1: #means agent still req action - eps not ended yet
                        tracked_agent = decision_steps.agent_id[0]

                    if tracked_agent in terminal_steps: # roll over or hit the target
                        print('Agent in terminal steps')
                        done = True
                        reward = terminal_steps[tracked_agent].reward
                        if reward > 0:
                            pass
                        else: reward = -1 # roll over or other unseen conditions
                        print(f'Terminal Step reward: {reward}')

                    elif tracked_agent in decision_steps: # the agent which requires action
                        reward = decision_steps[tracked_agent].reward
                        if reward<0:
                            if reward==-2.5:
                                reward=-3
                                print(f'Decision Step reward: {reward}')
                            # if reward<-1: hit = 1
                            
                    if STEPS >= MAX_STEPS:
                        reward = max_step_reward
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True
                    if STEPS % num_steps == 0:
                        print (f'Step: {STEPS}')

                    episode_reward = episode_reward + reward

                    rewards.append(reward)
                    # values.append(value)
                    values = torch.cat((values, value), dim=0)
                    log_probs.append(log_prob)
                    entropy_term = entropy_term + entropy
                    vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                    vt = vt_new

                    if done or steps == num_steps-1:
                        break
                
                
                discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
                cumulative = 0
                # print(len(rewards))
                for t in reversed(range(len(rewards))):
                    cumulative = rewards[t] + LAM * cumulative # Monte Carlo
                    discounted_rewards[t] = cumulative
                # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
                # Advantage Actor Critic

                # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
                # for t in range(len(rewards)-1):
                #         Qvals[t] = rewards[t] + LAM * values[t+1]
                
                # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
                # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
                #     = Q_t - V_t
                
                # Monte Carlo Advantage = reward + LAM * cumulative_reward
                # Actor_loss = -log(pai(s_t|a_t))*A_t
                # Critic_loss = A_t.pow(2) *0.5
                # Entropy_loss = -F.entropy(pai(St),index) * 0.001

                # entropy = -np.sum(np.mean(dist) * np.log(dist))
                
                #update actor critic
                
                # values = torch.FloatTensor(values).requires_grad_(True).to(device)
                discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
                log_probs = torch.stack(log_probs)
                advantage = discounted_rewards - values
                actor_loss = (-log_probs * advantage).mean()
                critic_loss = 0.5 * torch.square(advantage).mean()
                entropy_term /= num_steps
                entropy_loss = -0.1 * entropy_term
                ac_loss = actor_loss + critic_loss + entropy_loss
                # ac_loss = values.mean()
                optimizer.zero_grad()
                ac_loss.backward()
                optimizer.step()
                # print('updated')
                # for name, param in agent.named_parameters():
                #     if param.grad is not None:
                #         print(name, param.grad)
                #     else:
                #         print(name, "gradients not computed")
                # for name, param in agent.named_parameters():
                #     if name == 'value_estimator.weight':
                #         print(name, param)
            
                if done: break


            all_episode_reward.append(float(episode_reward))
            all_steps.append(STEPS)
            all_actor_loss.append(float(actor_loss))
            all_critic_loss.append(float(critic_loss))
            all_entropy_loss.append(float(entropy_loss))
            all_total_loss.append(float(ac_loss))
            if episode >= 200:
                avg_score = np.mean(all_episode_reward[-200:])
                all_average_reward.append(avg_score)
                if avg_score > best_score:
                    best_score = avg_score
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print(f'-----The best score for averaging previous 200 episode reward is {best_score}. Model has been saved-----')
                print('Training  | Episode: {}/{}  | Episode Reward: {:.1f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
            else:  print('Training  | Episode: {}/{}  | Episode Reward: {:.1f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
            if episode%5000 == 0:
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print("Model has been saved")
        print(all_average_reward)
        agent.save(episode ,ALG_NAME, ENV_ID)
        print("Model has been saved")

        data = {
                    'all_average_reward': all_average_reward,
                    'all_episode_reward': all_episode_reward,
                    'all_actor_loss': all_actor_loss,
                    'all_critic_loss': all_critic_loss,
                    'all_entropy_loss': all_entropy_loss,
                    'all_total_loss': all_total_loss,
                    'all_steps': all_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}.txt'
        with open(file_path, 'w') as file:
            json.dump(data, file)

cuda
--- target colour: blue, target object: prism ---




Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1/200000  | Episode Reward: 7.0  | Actor loss: -1.28 | Critic loss: 18.99 | Entropy loss: -0.0115  | Total Loss: 17.70 | Total Steps: 117
Model has been saved
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2/200000  | Episode Reward: 1.0  | Actor loss: -1.23 | Critic loss: 27.05 | Entropy loss: -0.0116  | Total Loss: 25.81 | Total Steps: 161
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3/200000  | Episode Reward: -2.0  | Actor loss: -2.07 | Critic loss: 28.74 | Entropy loss: -0.0111  | Total Loss: 26.67 | Total Steps: 87
--- target colour: blue, target object: cube --

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 28/200000  | Episode Reward: 4.0  | Actor loss: -0.38 | Critic loss: 3.36 | Entropy loss: -0.0190  | Total Loss: 2.96 | Total Steps: 99
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 29/200000  | Episode Reward: 7.0  | Actor loss: -0.39 | Critic loss: 3.09 | Entropy loss: -0.0199  | Total Loss: 2.68 | Total Steps: 125
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 30/200000  | Episode Reward: 4.0  | Actor loss: -0.64 | Critic loss: 5.08 | Entropy loss: -0.0352  | Total Loss: 4.41 | Total Steps: 182
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 55/200000  | Episode Reward: -8.0  | Actor loss: -1.36 | Critic loss: 13.52 | Entropy loss: -0.0227  | Total Loss: 12.14 | Total Steps: 135
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 56/200000  | Episode Reward: 7.0  | Actor loss: -0.35 | Critic loss: 3.42 | Entropy loss: -0.0258  | Total Loss: 3.05 | Total Steps: 147
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 57/200000  | Episode Reward: 4.0  | Actor loss: -0.29 | Critic loss: 5.32 | Entropy loss: -0.0097  | Total Loss: 5.02 | Total Steps: 128
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 58/200000  | Episo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 82/200000  | Episode Reward: 10.0  | Actor loss: 0.54 | Critic loss: 6.85 | Entropy loss: -0.0017  | Total Loss: 7.39 | Total Steps: 21
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 83/200000  | Episode Reward: -2.0  | Actor loss: -1.00 | Critic loss: 6.59 | Entropy loss: -0.0176  | Total Loss: 5.57 | Total Steps: 124
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 84/200000  | Episode Reward: 1.0  | Actor loss: -0.74 | Critic loss: 6.10 | Entropy loss: -0.0323  | Total Loss: 5.33 | Total Steps: 203
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 108/200000  | Episode Reward: -8.0  | Actor loss: -1.17 | Critic loss: 8.01 | Entropy loss: -0.0278  | Total Loss: 6.81 | Total Steps: 171
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 109/200000  | Episode Reward: -5.0  | Actor loss: -0.63 | Critic loss: 6.62 | Entropy loss: -0.0295  | Total Loss: 5.95 | Total Steps: 189
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 134/200000  | Episode Reward: -5.0  | Actor loss: -0.99 | Critic loss: 9.19 | Entropy loss: -0.0297  | Total Loss: 8.17 | Total Steps: 149
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 135/200000  | Episode Reward: 7.0  | Actor loss: -0.14 | Critic loss: 2.88 | Entropy loss: -0.0136  | Total Loss: 2.73 | Total Steps: 119
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 136/200000  | Episode Reward: 10.0  | Actor loss: 1.30 | Critic loss: 12.02 | Entropy loss: -0.0046  | Total Loss: 13.31 | Total Steps: 25
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Ep

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 160/200000  | Episode Reward: 10.0  | Actor loss: 0.51 | Critic loss: 2.98 | Entropy loss: -0.0127  | Total Loss: 3.48 | Total Steps: 82
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 161/200000  | Episode Reward: 10.0  | Actor loss: 0.88 | Critic loss: 11.18 | Entropy loss: -0.0016  | Total Loss: 12.06 | Total Steps: 17
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 162/200000  | Episode Reward: 10.0  | Actor loss: -0.01 | Critic loss: 3.25 | Entropy loss: -0.0104  | Total Loss: 3.22 | Total Steps: 83
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 163/200000  | Episode Reward: -2.0  | Actor 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 187/200000  | Episode Reward: 1.0  | Actor loss: -0.61 | Critic loss: 6.00 | Entropy loss: -0.0217  | Total Loss: 5.36 | Total Steps: 185
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 188/200000  | Episode Reward: 10.0  | Actor loss: 0.57 | Critic loss: 7.12 | Entropy loss: -0.0014  | Total Loss: 7.68 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 189/200000  | Episode Reward: 4.0  | Actor loss: -0.69 | Critic loss: 5.53 | Entropy loss: -0.0212  | Total Loss: 4.82 | Total Steps: 97
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 190/200000  | Ep

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 212/200000  | Episode Reward: 4.0  | Average Reward 3.78  | Actor loss: -0.58 | Critic loss: 5.30 | Entropy loss: -0.0288  | Total Loss: 4.68 | Total Steps: 189
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 213/200000  | Episode Reward: 4.0  | Average Reward 3.79  | Actor loss: -0.47 | Critic loss: 3.23 | Entropy loss: -0.0329  | Total Loss: 2.73 | Total Steps: 202
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 214/200000  | Episode Reward: 7.0  | Average Reward 3.78  | Actor loss: -0.55 | Critic loss: 3.80 | Entropy loss: -0.0343  | Total Loss: 3.21 | Total Steps: 177
--- target colour: red, target object: sphere ---
Agent in terminal steps
T

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 236/200000  | Episode Reward: -2.0  | Average Reward 3.96  | Actor loss: -0.84 | Critic loss: 7.74 | Entropy loss: -0.0217  | Total Loss: 6.88 | Total Steps: 133
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 237/200000  | Episode Reward: 0.0  | Average Reward 3.92  | Actor loss: -0.65 | Critic loss: 6.82 | Entropy loss: -0.0179  | Total Loss: 6.15 | Total Steps: 168
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 238/200000  | Episode Reward: -2.0  | Average Reward 3.87  | Actor loss: -0.73 | Critic loss: 8.91 | Entropy loss: -0.0186  | Total Loss: 8.16 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 261/200000  | Episode Reward: 10.0  | Average Reward 3.73  | Actor loss: 1.02 | Critic loss: 8.71 | Entropy loss: -0.0019  | Total Loss: 9.73 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 262/200000  | Episode Reward: 7.0  | Average Reward 3.80  | Actor loss: -0.31 | Critic loss: 3.44 | Entropy loss: -0.0227  | Total Loss: 3.11 | Total Steps: 131
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Step: 250
Max Step Reward: -10
Step: 500
Training  | Episode: 263/200000  | Episode Reward: -13.0  | Average Reward 3.73  | Actor loss: -1.04 | Critic loss: 6.93 | Entropy loss: -0.0330  | Total Loss: 5.86 | Total Steps: 500
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Tra

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 286/200000  | Episode Reward: 10.0  | Average Reward 3.61  | Actor loss: -0.43 | Critic loss: 1.89 | Entropy loss: -0.0273  | Total Loss: 1.43 | Total Steps: 121
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 287/200000  | Episode Reward: 10.0  | Average Reward 3.62  | Actor loss: 3.51 | Critic loss: 11.77 | Entropy loss: -0.0068  | Total Loss: 15.27 | Total Steps: 20
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 288/200000  | Episode Reward: 7.0  | Average Reward 3.63  | Actor loss: -0.29 | Critic loss: 3.68 | Entropy loss: -0.0263  | Total Loss: 3.36 | Total Steps: 193
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent i

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 311/200000  | Episode Reward: 4.0  | Average Reward 4.03  | Actor loss: -0.54 | Critic loss: 4.64 | Entropy loss: -0.0209  | Total Loss: 4.07 | Total Steps: 168
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 312/200000  | Episode Reward: 4.0  | Average Reward 4.04  | Actor loss: -0.34 | Critic loss: 4.53 | Entropy loss: -0.0151  | Total Loss: 4.18 | Total Steps: 110
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 4.09. Model has been saved-----
Training  | Episode: 313/200000  | Episode Reward: 7.0  | Average Reward 4.09  | Actor loss: -0.15 | Critic loss: 4.15 | Entropy loss: -0.0109  | Total Loss: 3.99

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 335/200000  | Episode Reward: 10.0  | Average Reward 4.12  | Actor loss: 0.34 | Critic loss: 5.50 | Entropy loss: -0.0027  | Total Loss: 5.84 | Total Steps: 26
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 336/200000  | Episode Reward: 10.0  | Average Reward 4.12  | Actor loss: -0.20 | Critic loss: 3.08 | Entropy loss: -0.0217  | Total Loss: 2.86 | Total Steps: 173
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 337/200000  | Episode Reward: 4.0  | Average Reward 4.09  | Actor loss: -0.38 | Critic loss: 5.10 | Entropy loss: -0.0086  | Total Loss: 4.71 | Total Steps: 115
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 338/200000  | Episode Rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 359/200000  | Episode Reward: 10.0  | Average Reward 4.54  | Actor loss: 0.10 | Critic loss: 3.75 | Entropy loss: -0.0019  | Total Loss: 3.85 | Total Steps: 24
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 360/200000  | Episode Reward: 10.0  | Average Reward 4.54  | Actor loss: 0.52 | Critic loss: 3.98 | Entropy loss: -0.0033  | Total Loss: 4.50 | Total Steps: 24
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 361/200000  | Episode Reward: -5.0  | Average Reward 4.46  | Actor loss: -0.74 | Critic loss: 9.75 | Entropy loss: -0.0250  | Total Loss: 8.99 | Total Steps: 165
--- target colour: blue, target object: capsule ---
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 383/200000  | Episode Reward: 1.0  | Average Reward 4.67  | Actor loss: -0.76 | Critic loss: 6.25 | Entropy loss: -0.0222  | Total Loss: 5.47 | Total Steps: 149
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 384/200000  | Episode Reward: 7.0  | Average Reward 4.66  | Actor loss: -0.45 | Critic loss: 3.69 | Entropy loss: -0.0167  | Total Loss: 3.22 | Total Steps: 125
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 385/200000  | Episode Reward: 4.0  | Average Reward 4.66  | Actor loss: -0.73 | Critic loss: 4.99 | Entropy loss: -0.0192  | Total Loss: 4.25 | Total Steps: 126
--- target colour: black, target object: cube 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 408/200000  | Episode Reward: -2.0  | Average Reward 4.58  | Actor loss: -0.64 | Critic loss: 7.26 | Entropy loss: -0.0281  | Total Loss: 6.59 | Total Steps: 156
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 409/200000  | Episode Reward: 4.0  | Average Reward 4.57  | Actor loss: -0.67 | Critic loss: 4.09 | Entropy loss: -0.0231  | Total Loss: 3.39 | Total Steps: 171
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 410/200000  | Episode Reward: 4.0  | Average Reward 4.59  | Actor loss: -0.17 | Critic loss: 4.20 | Entropy loss: -0.0091  | Total Loss: 4.02 | Total Ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 433/200000  | Episode Reward: 7.0  | Average Reward 4.70  | Actor loss: -0.25 | Critic loss: 4.49 | Entropy loss: -0.0222  | Total Loss: 4.22 | Total Steps: 164
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 434/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: 2.05 | Critic loss: 9.50 | Entropy loss: -0.0028  | Total Loss: 11.54 | Total Steps: 20
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 435/200000  | Episode Reward: -5.0  | Average Reward 4.68  | Actor loss: 0.46 | Critic loss: 3.03 | Entropy loss: -0.0059  | Total Loss: 3.49 | Total Steps: 294
--- target colour: black, target objec

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 457/200000  | Episode Reward: 7.0  | Average Reward 4.74  | Actor loss: -0.45 | Critic loss: 2.87 | Entropy loss: -0.0179  | Total Loss: 2.41 | Total Steps: 115
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 458/200000  | Episode Reward: 4.0  | Average Reward 4.71  | Actor loss: -0.44 | Critic loss: 4.41 | Entropy loss: -0.0138  | Total Loss: 3.96 | Total Steps: 105
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 459/200000  | Episode Reward: -1.0  | Average Reward 4.66  | Actor loss: -0.62 | Critic loss: 6.20 | Entropy loss: -0.0219  | Total Loss: 5.55 | Total Steps: 159
--- target colour: blue, target object: prism ---
Agent in terminal steps
T

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.09. Model has been saved-----
Training  | Episode: 481/200000  | Episode Reward: 4.0  | Average Reward 5.09  | Actor loss: -0.24 | Critic loss: 8.66 | Entropy loss: -0.0108  | Total Loss: 8.41 | Total Steps: 74
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.125. Model has been saved-----
Training  | Episode: 482/200000  | Episode Reward: 4.0  | Average Reward 5.12  | Actor loss: -0.31 | Critic loss: 4.52 | Entropy loss: -0.0210  | Total Loss: 4.19 | Total Steps: 169
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.17. Model has been

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 505/200000  | Episode Reward: 4.0  | Average Reward 5.14  | Actor loss: -0.88 | Critic loss: 5.37 | Entropy loss: -0.0264  | Total Loss: 4.46 | Total Steps: 155
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 506/200000  | Episode Reward: 10.0  | Average Reward 5.14  | Actor loss: 0.17 | Critic loss: 6.69 | Entropy loss: -0.0005  | Total Loss: 6.86 | Total Steps: 17
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 507/200000  | Episode Reward: 1.0  | Average Reward 5.09  | Actor loss: -0.62 | Critic loss: 6.13 | Entropy loss: -0.0179  | Total Loss: 5.49 | Total Steps: 157
--- target colour: blue, target object: sphere ---
Decision Step reward: -

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 530/200000  | Episode Reward: 4.0  | Average Reward 5.04  | Actor loss: -0.72 | Critic loss: 5.38 | Entropy loss: -0.0134  | Total Loss: 4.65 | Total Steps: 106
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 531/200000  | Episode Reward: 7.0  | Average Reward 5.09  | Actor loss: -0.17 | Critic loss: 4.82 | Entropy loss: -0.0225  | Total Loss: 4.62 | Total Steps: 174
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 532/200000  | Episode Reward: 1.0  | Average Reward 5.08  | Actor loss: -0.33 | Critic loss: 6.28 | Entropy loss: -0.0185  | Total Loss: 5.93 | Total Steps: 178
--- target colour: blue, target object: capsule ---
Agent in terminal steps


Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 555/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.59 | Critic loss: 4.93 | Entropy loss: -0.0208  | Total Loss: 4.32 | Total Steps: 177
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 556/200000  | Episode Reward: 4.0  | Average Reward 4.80  | Actor loss: -0.39 | Critic loss: 5.75 | Entropy loss: -0.0218  | Total Loss: 5.33 | Total Steps: 156
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 557/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.54 | Critic loss: 5.78 | Entropy loss: -0.0101  | Total Loss: 5.23 | Total Steps: 106
--- target colour: black, target object: capsule -

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 581/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.50 | Critic loss: 4.65 | Entropy loss: -0.0259  | Total Loss: 4.13 | Total Steps: 240
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 582/200000  | Episode Reward: 7.0  | Average Reward 4.74  | Actor loss: -0.28 | Critic loss: 4.47 | Entropy loss: -0.0096  | Total Loss: 4.18 | Total Steps: 111
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 583/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: -0.14 | Critic loss: 2.59 | Entropy loss: -0.0184  | Total Loss: 2.43 | Total Steps: 126
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 605/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: -0.01 | Critic loss: 1.74 | Entropy loss: -0.0199  | Total Loss: 1.71 | Total Steps: 119
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 606/200000  | Episode Reward: 1.0  | Average Reward 4.66  | Actor loss: -0.63 | Critic loss: 5.55 | Entropy loss: -0.0219  | Total Loss: 4.90 | Total Steps: 127
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 607/200000  | Episode Reward: 1.0  | Average Reward 4.63  | Actor loss: -0.47 | Critic loss: 4.86 | Entropy loss: -0.0348  | Total Loss: 4.36 | Total Steps: 205
--- target colour: yellow, target object: c

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 629/200000  | Episode Reward: -2.0  | Average Reward 4.36  | Actor loss: -1.19 | Critic loss: 4.93 | Entropy loss: -0.0421  | Total Loss: 3.70 | Total Steps: 242
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 630/200000  | Episode Reward: 10.0  | Average Reward 4.39  | Actor loss: 0.17 | Critic loss: 4.69 | Entropy loss: -0.0014  | Total Loss: 4.85 | Total Steps: 24
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 631/200000  | Episode Reward: 10.0  | Average Reward 4.39  | Actor loss: -0.12 | Critic loss: 2.40 | Entropy loss: -0.0320  | Total Loss: 2.25 | Total Steps: 167
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 654/200000  | Episode Reward: 4.0  | Average Reward 4.33  | Actor loss: -0.49 | Critic loss: 7.45 | Entropy loss: -0.0117  | Total Loss: 6.96 | Total Steps: 79
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 655/200000  | Episode Reward: 7.0  | Average Reward 4.37  | Actor loss: -0.31 | Critic loss: 4.64 | Entropy loss: -0.0080  | Total Loss: 4.32 | Total Steps: 91
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 656/200000  | Episode Reward: 10.0  | Average Reward 4.40  | Actor loss: 0.13 | Critic loss: 2.34 | Entropy loss: -0.0125  | Total Loss: 2.45 | Total Steps: 101
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 657/2000

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 680/200000  | Episode Reward: 4.0  | Average Reward 4.30  | Actor loss: -0.41 | Critic loss: 4.49 | Entropy loss: -0.0201  | Total Loss: 4.06 | Total Steps: 166
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 681/200000  | Episode Reward: 4.0  | Average Reward 4.30  | Actor loss: -0.37 | Critic loss: 4.52 | Entropy loss: -0.0208  | Total Loss: 4.12 | Total Steps: 149
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 682/200000  | Episode Reward: 10.0  | Average Reward 4.33  | Actor loss: 1.45 | Critic loss: 8.11 | Entropy loss: -0.0022  | Total Loss: 9.55 | Total Steps: 18
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 705/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.45 | Critic loss: 4.60 | Entropy loss: -0.0152  | Total Loss: 4.13 | Total Steps: 106
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 706/200000  | Episode Reward: 1.0  | Average Reward 4.17  | Actor loss: -0.63 | Critic loss: 5.40 | Entropy loss: -0.0262  | Total Loss: 4.74 | Total Steps: 160
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 707/200000  | Episode Reward: 10.0  | Average Reward 4.21  | Actor loss: 1.96 | Critic loss: 7.69 | Entropy loss: -0.0035  | Total Loss: 9.64 | Total Steps: 18
--- target colour: black, target object: cylinder ---
Decision Step 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 729/200000  | Episode Reward: 7.0  | Average Reward 3.98  | Actor loss: -0.20 | Critic loss: 3.61 | Entropy loss: -0.0211  | Total Loss: 3.39 | Total Steps: 146
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 730/200000  | Episode Reward: 0.0  | Average Reward 3.96  | Actor loss: -0.43 | Critic loss: 6.47 | Entropy loss: -0.0197  | Total Loss: 6.01 | Total Steps: 150
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 731/200000  | Episode Reward: 4.0  | Average Reward 3.95  | Actor loss: -0.41 | Critic loss: 4.77 | Entropy loss: -0.0193  | Total Loss: 4.34 | Total Steps: 155
--- target colour: yellow, target object: cylind

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 753/200000  | Episode Reward: 10.0  | Average Reward 3.54  | Actor loss: -0.19 | Critic loss: 2.14 | Entropy loss: -0.0178  | Total Loss: 1.93 | Total Steps: 112
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 754/200000  | Episode Reward: 10.0  | Average Reward 3.54  | Actor loss: 0.11 | Critic loss: 8.12 | Entropy loss: -0.0004  | Total Loss: 8.23 | Total Steps: 21
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 755/200000  | Episode Reward: 10.0  | Average Reward 3.56  | Actor loss: 0.35 | Critic loss: 10.15 | Entropy loss: -0.0005  | Total Loss: 10.50 | Total Steps: 17
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 756/200000  | Episode Reward: 10.0  | Average Reward 3.60  | Actor loss: 0.09

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 779/200000  | Episode Reward: 1.0  | Average Reward 3.44  | Actor loss: -0.57 | Critic loss: 5.75 | Entropy loss: -0.0250  | Total Loss: 5.15 | Total Steps: 178
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 780/200000  | Episode Reward: 10.0  | Average Reward 3.48  | Actor loss: 0.33 | Critic loss: 4.20 | Entropy loss: -0.0025  | Total Loss: 4.52 | Total Steps: 25
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 781/200000  | Episode Reward: 10.0  | Average Reward 3.52  | Actor loss: 0.36 | Critic loss: 4.02 | Entropy loss: -0.0023  | Total Loss: 4.37 | Total Steps: 24
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Term

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 804/200000  | Episode Reward: -11.0  | Average Reward 3.55  | Actor loss: -0.10 | Critic loss: 4.29 | Entropy loss: -0.0094  | Total Loss: 4.18 | Total Steps: 371
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 805/200000  | Episode Reward: 10.0  | Average Reward 3.55  | Actor loss: -0.67 | Critic loss: 3.15 | Entropy loss: -0.0252  | Total Loss: 2.45 | Total Steps: 143
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 806/200000  | Episode Reward: 4.0  | Average Reward 3.56  | Actor loss: -0.37 | Critic loss: 5.28 | Entropy loss: -0.013

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 829/200000  | Episode Reward: -2.0  | Average Reward 3.52  | Actor loss: -0.69 | Critic loss: 7.15 | Entropy loss: -0.0240  | Total Loss: 6.44 | Total Steps: 171
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 830/200000  | Episode Reward: 1.0  | Average Reward 3.48  | Actor loss: -0.75 | Critic loss: 5.79 | Entropy loss: -0.0236  | Total Loss: 5.02 | Total Steps: 125
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 831/200000  | Episode Reward: 10.0  | Average Reward 3.48  | Actor loss: 0.29 | Critic loss: 5.03 | Entropy loss: -0.0014  | Total Loss: 5.32 | Total Steps: 19
--- target colour: ye

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 854/200000  | Episode Reward: 10.0  | Average Reward 3.54  | Actor loss: 0.40 | Critic loss: 5.41 | Entropy loss: -0.0016  | Total Loss: 5.81 | Total Steps: 19
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 855/200000  | Episode Reward: 4.0  | Average Reward 3.52  | Actor loss: -0.29 | Critic loss: 6.17 | Entropy loss: -0.0121  | Total Loss: 5.86 | Total Steps: 103
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 856/200000  | Episode Reward: 1.0  | Average Reward 3.48  | Actor loss: -0.40 | Critic loss: 6.03 | Entropy loss: -0.0266  | Total Loss: 5.60 | Total Steps: 186
--- target colour: blue, target object: prism ---
Decision Step reward: -3
De

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 879/200000  | Episode Reward: -2.0  | Average Reward 3.31  | Actor loss: -0.54 | Critic loss: 6.14 | Entropy loss: -0.0157  | Total Loss: 5.58 | Total Steps: 159
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 880/200000  | Episode Reward: 10.0  | Average Reward 3.35  | Actor loss: -0.25 | Critic loss: 3.03 | Entropy loss: -0.0083  | Total Loss: 2.77 | Total Steps: 91
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 881/200000  | Episode Reward: 10.0  | Average Reward 3.38  | Actor loss: -0.31 | Critic loss: 2.84 | Entropy loss: -0.0139  | Total Loss: 2.52 | Total Steps: 103
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 903/200000  | Episode Reward: 10.0  | Average Reward 3.12  | Actor loss: 1.40 | Critic loss: 6.59 | Entropy loss: -0.0063  | Total Loss: 7.98 | Total Steps: 27
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 904/200000  | Episode Reward: -14.0  | Average Reward 3.00  | Actor loss: -0.71 | Critic loss: 4.81 | Entropy loss: -0.0237  | Total Loss: 4.07 | Total Steps: 419
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 905/200000  | Episode Reward: 10.0  | Average Reward 3.02  | Actor loss: 0.41 | Critic loss: 8.39 | Entropy loss: -0.0008  | Total Loss: 8.80 | Tot

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 928/200000  | Episode Reward: 10.0  | Average Reward 3.27  | Actor loss: 1.59 | Critic loss: 7.98 | Entropy loss: -0.0020  | Total Loss: 9.56 | Total Steps: 22
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 929/200000  | Episode Reward: 10.0  | Average Reward 3.29  | Actor loss: 0.05 | Critic loss: 4.74 | Entropy loss: -0.0003  | Total Loss: 4.78 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 930/200000  | Episode Reward: 10.0  | Average Reward 3.34  | Actor loss: -0.05 | Critic loss: 2.83 | Entropy loss: -0.0118  | Total Loss: 2.77 | Total Steps: 157
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 931/200000  | Episode Reward: 10.0  | Average Reward 3.37  | Actor loss: 0.

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 954/200000  | Episode Reward: 7.0  | Average Reward 3.87  | Actor loss: -0.45 | Critic loss: 4.59 | Entropy loss: -0.0170  | Total Loss: 4.11 | Total Steps: 143
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 955/200000  | Episode Reward: 0.0  | Average Reward 3.81  | Actor loss: -0.87 | Critic loss: 5.03 | Entropy loss: -0.0436  | Total Loss: 4.12 | Total Steps: 240
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 956/200000  | Episode Reward: 10.0  | Average Reward 3.81  | Actor loss: 2.08 | Critic loss: 9.10 | Entropy loss: -0.0059  | Total Loss: 11.18 | Total Steps: 44
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Step: 250
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 979/200000  | Episode Reward: 1.0  | Average Reward 3.87  | Actor loss: -0.62 | Critic loss: 6.28 | Entropy loss: -0.0191  | Total Loss: 5.64 | Total Steps: 169
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 980/200000  | Episode Reward: 10.0  | Average Reward 3.87  | Actor loss: -0.35 | Critic loss: 2.48 | Entropy loss: -0.0173  | Total Loss: 2.12 | Total Steps: 104
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 981/200000  | Episode Reward: 4.0  | Average Reward 3.84  | Actor loss: -0.65 | Critic loss: 5.43 | Entropy loss: -0.0251  | Total Loss: 4.75 | Total Steps: 150
--- target colour: red, target object: cube ---
Agent in terminal steps
T

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1004/200000  | Episode Reward: 10.0  | Average Reward 3.90  | Actor loss: 0.70 | Critic loss: 4.41 | Entropy loss: -0.0015  | Total Loss: 5.11 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1005/200000  | Episode Reward: 1.0  | Average Reward 3.86  | Actor loss: -0.72 | Critic loss: 6.89 | Entropy loss: -0.0289  | Total Loss: 6.14 | Total Steps: 174
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1006/200000  | Episode Reward: 10.0  | Average Reward 3.89  | Actor loss: 0.17 | Critic loss: 3.21 | Entropy loss: -0.0022  | Total Loss: 3.38 | Total Steps: 22
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1030/200000  | Episode Reward: 7.0  | Average Reward 4.43  | Actor loss: -0.39 | Critic loss: 3.90 | Entropy loss: -0.0246  | Total Loss: 3.48 | Total Steps: 178
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1031/200000  | Episode Reward: 10.0  | Average Reward 4.43  | Actor loss: 1.15 | Critic loss: 3.42 | Entropy loss: -0.0029  | Total Loss: 4.56 | Total Steps: 17
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1032/200000  | Episode Reward: -2.0  | Average Reward 4.37  | Actor loss: -1.21 | Critic loss: 9.52 | Entropy loss: -0.0243  | Total Loss: 8.29 | Total Steps: 149
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1056/200000  | Episode Reward: 10.0  | Average Reward 4.67  | Actor loss: 0.06 | Critic loss: 4.02 | Entropy loss: -0.0131  | Total Loss: 4.07 | Total Steps: 90
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1057/200000  | Episode Reward: -22.0  | Average Reward 4.59  | Actor loss: -0.52 | Critic loss: 4.02 | Entropy loss: -0.0210  | Total Loss: 3.48 | Total Steps: 379
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1058/200000  | Episode Reward: -6.0  | Average Reward 4.51  | Actor loss: -1.19 | Critic loss: 11.26 | Entropy loss: -0.0192  | Total Loss: 10.06 | Total Steps: 143
---

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1081/200000  | Episode Reward: -20.0  | Average Reward 4.38  | Actor loss: -0.36 | Critic loss: 7.39 | Entropy loss: -0.0136  | Total Loss: 7.01 | Total Steps: 373
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1082/200000  | Episode Reward: 4.0  | Average Reward 4.34  | Actor loss: -0.47 | Critic loss: 6.32 | Entropy loss: -0.0147  | Total Loss: 5.84 | Total Steps: 110
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1083/200000  | Epi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1105/200000  | Episode Reward: 10.0  | Average Reward 4.71  | Actor loss: 0.05 | Critic loss: 4.68 | Entropy loss: -0.0003  | Total Loss: 4.73 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1106/200000  | Episode Reward: 4.0  | Average Reward 4.69  | Actor loss: -0.57 | Critic loss: 5.36 | Entropy loss: -0.0247  | Total Loss: 4.76 | Total Steps: 174
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1107/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0.21 | Critic loss: 3.28 | Entropy loss: -0.0024  | Total Loss: 3.49 | Total Steps: 23
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1131/200000  | Episode Reward: -5.0  | Average Reward 4.58  | Actor loss: -0.64 | Critic loss: 6.93 | Entropy loss: -0.0189  | Total Loss: 6.27 | Total Steps: 173
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1132/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 1.03 | Critic loss: 8.50 | Entropy loss: -0.0043  | Total Loss: 9.52 | Total Steps: 36
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1133/200000  | Episode Reward: 4.0  | Average Reward 4.58  | Actor loss: -0.43 | Critic loss: 5.14 | Entropy loss: -0.0108  | Total Loss: 4.70 | Total Steps: 103
--- target colour: gr

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1156/200000  | Episode Reward: -5.0  | Average Reward 4.54  | Actor loss: -1.02 | Critic loss: 8.35 | Entropy loss: -0.0208  | Total Loss: 7.31 | Total Steps: 161
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1157/200000  | Episode Reward: 10.0  | Average Reward 4.59  | Actor loss: 1.27 | Critic loss: 4.70 | Entropy loss: -0.0023  | Total Loss: 5.96 | Total Steps: 19
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1158/200000  | Episode Reward: 4.0  | Average Reward 4.59  | Actor loss: -0.75 | Critic loss: 3.51 | Entropy loss: -0.0197  | Total Loss: 2.74 | Total Steps: 125
--- target colour: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1181/200000  | Episode Reward: -2.0  | Average Reward 4.45  | Actor loss: -0.70 | Critic loss: 7.39 | Entropy loss: -0.0255  | Total Loss: 6.66 | Total Steps: 169
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1182/200000  | Episode Reward: 4.0  | Average Reward 4.42  | Actor loss: -0.62 | Critic loss: 5.23 | Entropy loss: -0.0131  | Total Loss: 4.60 | Total Steps: 107
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1183/200000  | Episode Reward: 4.0  | Average Reward 4.38  | Actor loss: -0.44 | Critic loss: 4.68 | Entropy loss: -0.0153  | Total Loss: 4.23 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1206/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: -0.10 | Critic loss: 2.92 | Entropy loss: -0.0161  | Total Loss: 2.80 | Total Steps: 111
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1207/200000  | Episode Reward: -8.0  | Average Reward 4.49  | Actor loss: 0.33 | Critic loss: 12.93 | Entropy loss: -0.0003  | Total Loss: 13.26 | Total Steps: 259
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1208/200000  | Episode Reward: 1.0  | Average Reward 4.49  | Actor loss: -0.68 | Critic loss: 6.25 | Entropy loss: -0.0156 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1230/200000  | Episode Reward: -11.0  | Average Reward 4.05  | Actor loss: 0.04 | Critic loss: 11.18 | Entropy loss: -0.0001  | Total Loss: 11.22 | Total Steps: 256
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1231/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: 0.43 | Critic loss: 6.21 | Entropy loss: -0.0014  | Total Loss: 6.65 | Total Steps: 17
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1232/200000  | Episode Reward: 4.0  | Average Reward 4.08  | Actor loss: 0.13 | Critic loss: 8.83 | Entropy loss:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1255/200000  | Episode Reward: 1.0  | Average Reward 3.92  | Actor loss: -0.56 | Critic loss: 5.30 | Entropy loss: -0.0326  | Total Loss: 4.70 | Total Steps: 185
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1256/200000  | Episode Reward: 1.0  | Average Reward 3.88  | Actor loss: -0.64 | Critic loss: 6.20 | Entropy loss: -0.0221  | Total Loss: 5.53 | Total Steps: 154
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1257/200000  | Episode Reward: 1.0  | Average Reward 4.00  | Actor loss: -0.96 | Critic loss: 5.12 | Entropy loss: -0.0357  | Total 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1280/200000  | Episode Reward: 4.0  | Average Reward 4.03  | Actor loss: -0.26 | Critic loss: 4.93 | Entropy loss: -0.0166  | Total Loss: 4.65 | Total Steps: 122
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1281/200000  | Episode Reward: 10.0  | Average Reward 4.18  | Actor loss: 0.15 | Critic loss: 3.24 | Entropy loss: -0.0016  | Total Loss: 3.39 | Total Steps: 23
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1282/200000  | Episode Reward: 4.0  | Average Reward 4.18  | Actor loss: -0.70 | Critic loss: 5.80 | Entropy loss: -0.0125  | Total Loss: 5.08 | Total Steps: 106
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1305/200000  | Episode Reward: 10.0  | Average Reward 4.32  | Actor loss: -0.25 | Critic loss: 3.58 | Entropy loss: -0.0113  | Total Loss: 3.32 | Total Steps: 106
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1306/200000  | Episode Reward: 10.0  | Average Reward 4.35  | Actor loss: 0.10 | Critic loss: 5.32 | Entropy loss: -0.0003  | Total Loss: 5.42 | Total Steps: 17
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1307/200000  | Episode Reward: 4.0  | Average Reward 4.32  | Actor loss: -0.49 | Critic loss: 3.29 | Entropy loss: -0.0162  | Total Loss: 2.78 | Total Steps: 117
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1330/200000  | Episode Reward: 4.0  | Average Reward 4.17  | Actor loss: -0.30 | Critic loss: 2.45 | Entropy loss: -0.0166  | Total Loss: 2.13 | Total Steps: 411
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1331/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.36 | Critic loss: 4.50 | Entropy loss: -0.0177  | Total Loss: 4.12 | Total Steps: 106
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1332/200000  | Episode Reward: 4.0  | Average Reward 4.18  | Actor loss: -0.25 | Critic loss: 4.55 | Entropy loss: -0.0172  | Total Loss: 4.28 | Total Steps: 116
--- target colour: black, target ob

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1355/200000  | Episode Reward: 4.0  | Average Reward 4.18  | Actor loss: -0.49 | Critic loss: 4.71 | Entropy loss: -0.0208  | Total Loss: 4.20 | Total Steps: 123
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1356/200000  | Episode Reward: 10.0  | Average Reward 4.26  | Actor loss: 0.77 | Critic loss: 6.11 | Entropy loss: -0.0018  | Total Loss: 6.88 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1357/200000  | Episode Reward: 1.0  | Average Reward 4.21  | Actor loss: -0.55 | Critic loss: 5.51 | Entropy loss: -0.0211  | Total Loss: 4.95 | Total Steps: 176
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1381/200000  | Episode Reward: 4.0  | Average Reward 4.50  | Actor loss: -0.58 | Critic loss: 4.60 | Entropy loss: -0.0195  | Total Loss: 4.00 | Total Steps: 153
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1382/200000  | Episode Reward: 4.0  | Average Reward 4.50  | Actor loss: -0.24 | Critic loss: 5.67 | Entropy loss: -0.0213  | Total Loss: 5.41 | Total Steps: 165
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1383/200000  | Episode Reward: -2.0  | Average Reward 4.47  | Actor loss: -0.33 | Critic loss: 6.43 | Entropy loss: -0.0150  | Total Loss: 6.08 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1407/200000  | Episode Reward: 10.0  | Average Reward 4.74  | Actor loss: 0.32 | Critic loss: 4.12 | Entropy loss: -0.0016  | Total Loss: 4.44 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1408/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -1.11 | Critic loss: 6.52 | Entropy loss: -0.0206  | Total Loss: 5.39 | Total Steps: 112
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1409/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: 0.85 | Critic loss: 4.25 | Entropy loss: -0.0017  | Total Loss: 5.10 | Total Steps: 18
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1432/200000  | Episode Reward: 7.0  | Average Reward 4.85  | Actor loss: 1.09 | Critic loss: 7.42 | Entropy loss: -0.0162  | Total Loss: 8.49 | Total Steps: 85
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1433/200000  | Episode Reward: -2.0  | Average Reward 4.79  | Actor loss: -0.44 | Critic loss: 7.63 | Entropy loss: -0.0203  | Total Loss: 7.18 | Total Steps: 172
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1434/200000  | Episode Reward: 10.0  | Average Reward 4.79  | Actor loss: -0.45 | Critic loss: 3.09 | Entropy loss: -0.0254  | Total Loss: 2.61 | Total Steps: 153
--- target colour: black, target object: cylinder ---
Decision Step r

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1456/200000  | Episode Reward: -2.0  | Average Reward 4.71  | Actor loss: -0.43 | Critic loss: 5.58 | Entropy loss: -0.0221  | Total Loss: 5.13 | Total Steps: 202
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1457/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: -0.06 | Critic loss: 2.91 | Entropy loss: -0.0123  | Total Loss: 2.84 | Total Steps: 107
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1458/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: 0.91 | Critic loss: 6.23 | Entropy loss: -0.0013  | Total Loss: 7.14 | Total Steps: 18
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1459/200000  | Episode Reward: 10.0  | Average Rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1481/200000  | Episode Reward: 3.0  | Average Reward 4.66  | Actor loss: -0.48 | Critic loss: 4.34 | Entropy loss: -0.0207  | Total Loss: 3.84 | Total Steps: 128
--- target colour: green, target object: prism ---
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1482/200000  | Episode Reward: 10.0  | Average Reward 4.68  | Actor loss: -0.18 | Critic loss: 2.64 | Entropy loss: -0.0073  | Total Loss: 2.44 | Total Steps: 356
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1483/200000  | Episode Reward: 4.0  | Average Reward 4.73  | Actor loss: -0.33 | Critic loss: 5.48 | Entropy loss: -0.0161  | Total Loss: 5.13 | Total Steps: 110
--- target colour: green, target object: cylinder ---
Step: 250
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1507/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: -0.22 | Critic loss: 2.88 | Entropy loss: -0.0088  | Total Loss: 2.65 | Total Steps: 102
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1508/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: 0.16 | Critic loss: 3.69 | Entropy loss: -0.0030  | Total Loss: 3.85 | Total Steps: 25
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1509/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: -0.12 | Critic loss: 3.15 | Entropy loss: -0.0174  | Total Loss: 3.01 | Total Steps: 106
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1510/200000  | E

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1532/200000  | Episode Reward: -8.0  | Average Reward 4.69  | Actor loss: -0.49 | Critic loss: 3.10 | Entropy loss: -0.0228  | Total Loss: 2.58 | Total Steps: 424
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1533/200000  | Episode Reward: 7.0  | Average Reward 4.69  | Actor loss: -0.68 | Critic loss: 3.30 | Entropy loss: -0.0320  | Total Loss: 2.59 | Total Steps: 181
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1534/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: 2.02 | Critic loss: 5.86 | Entropy loss: -0.0039  | Total Loss: 7.87 | Total Steps: 21
--- targ

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1556/200000  | Episode Reward: 7.0  | Average Reward 4.32  | Actor loss: -0.15 | Critic loss: 3.92 | Entropy loss: -0.0145  | Total Loss: 3.75 | Total Steps: 147
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1557/200000  | Episode Reward: 10.0  | Average Reward 4.37  | Actor loss: 0.64 | Critic loss: 5.90 | Entropy loss: -0.0016  | Total Loss: 6.54 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1558/200000  | Episode Reward: 4.0  | Average Reward 4.33  | Actor loss: -0.70 | Critic loss: 5.32 | Entropy loss: -0.0279  | Total Loss: 4.59 | Total Steps: 150
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1582/200000  | Episode Reward: -26.0  | Average Reward 4.13  | Actor loss: -0.67 | Critic loss: 8.85 | Entropy loss: -0.0175  | Total Loss: 8.16 | Total Steps: 373
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1583/200000  | Episode Reward: -2.0  | Average Reward 4.13  | Actor loss: 0.58 | Critic loss: 5.52 | Entropy loss: -0.0064  | Total Loss: 6.09 | Total Steps: 306
--- target colour: yellow, target object: sphere ---
Dec

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1606/200000  | Episode Reward: -5.0  | Average Reward 3.49  | Actor loss: -0.92 | Critic loss: 6.26 | Entropy loss: -0.0222  | Total Loss: 5.32 | Total Steps: 149
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1607/200000  | Episode Reward: 4.0  | Average Reward 3.46  | Actor loss: -0.16 | Critic loss: 5.61 | Entropy loss: -0.0143  | Total Loss: 5.43 | Total Steps: 108
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1608/200000  | Episode Reward: -2.0  | Average Reward 3.43  | Actor loss: -0.5

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1631/200000  | Episode Reward: 4.0  | Average Reward 3.64  | Actor loss: -0.82 | Critic loss: 4.86 | Entropy loss: -0.0230  | Total Loss: 4.02 | Total Steps: 108
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1632/200000  | Episode Reward: 4.0  | Average Reward 3.62  | Actor loss: -0.44 | Critic loss: 5.22 | Entropy loss: -0.0116  | Total Loss: 4.76 | Total Steps: 106
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1633/200000  | Episode Reward: 1.0  | Average Reward 3.64  | Actor loss: -0.40 | Critic loss: 6.34 | Entropy loss: -0.0135  | Total Loss: 5.93 | Total Steps: 140
--- target colour: blu

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1657/200000  | Episode Reward: 1.0  | Average Reward 3.88  | Actor loss: -0.62 | Critic loss: 6.07 | Entropy loss: -0.0308  | Total Loss: 5.42 | Total Steps: 151
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1658/200000  | Episode Reward: 4.0  | Average Reward 3.85  | Actor loss: -0.65 | Critic loss: 5.49 | Entropy loss: -0.0182  | Total Loss: 4.82 | Total Steps: 106
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1659/200000  | Episode Reward: 10.0  | Average Reward 3.85  | Actor loss: 0.20 | Critic loss: 3.49 | Entropy loss: -0.0124  | Total Loss: 3.68 | Total Steps: 118
--- target colour: black, target object: cube ---
Agent in terminal ste

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1682/200000  | Episode Reward: -2.0  | Average Reward 3.92  | Actor loss: 1.08 | Critic loss: 5.89 | Entropy loss: -0.0064  | Total Loss: 6.96 | Total Steps: 287
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1683/200000  | Episode Reward: 4.0  | Average Reward 3.92  | Actor loss: -0.46 | Critic loss: 4.72 | Entropy loss: -0.0274  | Total Loss: 4.24 | Total Steps: 139
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1684/200000  | Episode Reward: 4.0  | Average Reward 3.90  | Actor loss: -0.34 | Critic loss: 5.05 | Entropy loss: -0.0152  | Total Loss: 4.69 | Total Steps: 143
--- targ

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1707/200000  | Episode Reward: 10.0  | Average Reward 3.88  | Actor loss: -0.23 | Critic loss: 3.24 | Entropy loss: -0.0103  | Total Loss: 3.00 | Total Steps: 112
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1708/200000  | Episode Reward: -5.0  | Average Reward 3.81  | Actor loss: 0.42 | Critic loss: 5.88 | Entropy loss: -0.0014  | Total Loss: 6.30 | Total Steps: 277
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1709/200000  | Episode Reward: 10.0  | Average Reward 3.81  | Actor loss: 0.61 | Critic loss: 6.30 | Entropy loss: -0.0064  | Total Loss: 6.91 | Total Steps: 63
--- target colour: red, target object: sphere ---
Agent in te

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1732/200000  | Episode Reward: -14.0  | Average Reward 3.77  | Actor loss: 0.06 | Critic loss: 7.10 | Entropy loss: -0.0009  | Total Loss: 7.16 | Total Steps: 297
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1733/200000  | Episode Reward: 4.0  | Average Reward 3.76  | Actor loss: -0.44 | Critic loss: 6.16 | Entropy loss: -0.0145  | Total Loss: 5.70 | Total Steps: 109
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1734/200000  | Episode Reward: 4.0  | Av

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1757/200000  | Episode Reward: 1.0  | Average Reward 4.05  | Actor loss: -0.58 | Critic loss: 6.61 | Entropy loss: -0.0135  | Total Loss: 6.01 | Total Steps: 142
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1758/200000  | Episode Reward: -5.0  | Average Reward 4.01  | Actor loss: -0.93 | Critic loss: 7.19 | Entropy loss: -0.0243  | Total Loss: 6.24 | Total Steps: 181
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1759/200000  | Episode Reward: 10.0  | Average Reward 4.04  | Actor loss: -0.09 | Critic loss: 2.64 | Entropy loss: -0.0232  | Total Loss: 2.53 | Total S

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1782/200000  | Episode Reward: 10.0  | Average Reward 4.13  | Actor loss: 1.24 | Critic loss: 7.14 | Entropy loss: -0.0061  | Total Loss: 8.37 | Total Steps: 43
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1783/200000  | Episode Reward: 4.0  | Average Reward 4.17  | Actor loss: -0.37 | Critic loss: 5.41 | Entropy loss: -0.0173  | Total Loss: 5.02 | Total Steps: 150
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1784/200000  | Episode Reward: 4.0  | Average Reward 4.34  | Actor loss: -0.51 | Critic loss: 4.77 | Entropy loss: -0.0196  | Total Loss: 4.24 | Total Steps: 214
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1807/200000  | Episode Reward: 4.0  | Average Reward 4.49  | Actor loss: -0.57 | Critic loss: 4.27 | Entropy loss: -0.0145  | Total Loss: 3.68 | Total Steps: 108
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1808/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: -0.10 | Critic loss: 2.35 | Entropy loss: -0.0104  | Total Loss: 2.25 | Total Steps: 134
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1809/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: 0.11 | Critic loss: 5.64 | Entropy loss: -0.0003  | Total Loss: 5.75 | Total Steps: 21
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1810/200000  | Episode

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1832/200000  | Episode Reward: 1.0  | Average Reward 4.28  | Actor loss: -0.38 | Critic loss: 6.10 | Entropy loss: -0.0111  | Total Loss: 5.71 | Total Steps: 143
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1833/200000  | Episode Reward: 1.0  | Average Reward 4.28  | Actor loss: -0.79 | Critic loss: 6.38 | Entropy loss: -0.0202  | Total Loss: 5.57 | Total Steps: 157
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1834/200000  | Episode Reward: 4.0  | Average Reward 4.25  | Actor loss: -0.24 | Critic loss: 5.37 | Entropy loss: -0.0213  | Total Loss: 5.11 | Total Steps: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1855/200000  | Episode Reward: -11.0  | Average Reward 3.60  | Actor loss: 0.59 | Critic loss: 5.39 | Entropy loss: -0.0051  | Total Loss: 5.98 | Total Steps: 308
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1856/200000  | Episode Reward: -5.0  | Average Reward 3.54  | Actor loss: -0.73 | Critic loss: 8.08 | Entropy loss: -0.0309  | Total Loss: 7.32 | Total Steps: 159
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1879/200000  | Episode Reward: 7.0  | Average Reward 3.27  | Actor loss: -0.36 | Critic loss: 4.24 | Entropy loss: -0.0299  | Total Loss: 3.84 | Total Steps: 185
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1880/200000  | Episode Reward: -2.0  | Average Reward 3.26  | Actor loss: 0.23 | Critic loss: 3.04 | Entropy loss: -0.0093  | Total Loss: 3.26 | Total Steps: 322
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1881/200000  | Episode Reward: 3.0  | Average Reward 3.25  | Actor loss: -0.35 | Critic loss: 5.79 | Entropy loss: -0.0167  | Total Loss: 5.43 | Total Steps: 159
--- target c

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1903/200000  | Episode Reward: 4.0  | Average Reward 2.94  | Actor loss: 0.06 | Critic loss: 4.36 | Entropy loss: -0.0281  | Total Loss: 4.40 | Total Steps: 156
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1904/200000  | Episode Reward: 4.0  | Average Reward 2.94  | Actor loss: -0.21 | Critic loss: 4.02 | Entropy loss: -0.0112  | Total Loss: 3.80 | Total Steps: 318
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1905/200000  | Episode Reward: 10.0  | Average Reward 3.00  | Actor loss: 0.71 | Critic loss: 6.09 | Entropy loss: -0.0015  | Total Loss: 6.80 | Total Steps: 16
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1928/200000  | Episode Reward: 10.0  | Average Reward 2.90  | Actor loss: 0.08 | Critic loss: 5.16 | Entropy loss: -0.0004  | Total Loss: 5.24 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1929/200000  | Episode Reward: 10.0  | Average Reward 2.94  | Actor loss: 0.63 | Critic loss: 5.33 | Entropy loss: -0.0016  | Total Loss: 5.96 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1930/200000  | Episode Reward: 10.0  | Average Reward 2.95  | Actor loss: 0.15 | Critic loss: 4.88 | Entropy loss: -0.0006  | Total Loss: 5.03 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1931/200000  | Episode Reward: 7.0  | Average 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1953/200000  | Episode Reward: -2.0  | Average Reward 2.79  | Actor loss: -1.34 | Critic loss: 6.16 | Entropy loss: -0.0409  | Total Loss: 4.78 | Total Steps: 159
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1954/200000  | Episode Reward: 4.0  | Average Reward 2.79  | Actor loss: -0.48 | Critic loss: 5.67 | Entropy loss: -0.0113  | Total Loss: 5.19 | Total Steps: 101
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1955/200000  | Episode Reward: 3.0  | Average Reward 2.75  | Actor loss: -0.95 | Critic loss: 5.77 | Entropy loss: -0.0198  | Total Loss: 4.80 | Total Ste

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1978/200000  | Episode Reward: 1.0  | Average Reward 2.83  | Actor loss: -0.74 | Critic loss: 6.18 | Entropy loss: -0.0148  | Total Loss: 5.43 | Total Steps: 138
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1979/200000  | Episode Reward: 4.0  | Average Reward 2.80  | Actor loss: -0.22 | Critic loss: 5.36 | Entropy loss: -0.0114  | Total Loss: 5.13 | Total Steps: 137
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 1980/200000  | Episode Reward: 1.0  | Average Reward 2.79  | Actor loss: -0.37 | Critic loss: 6.03 | Entropy loss: -0.0132  | Total Loss: 5.65 | Total Ste

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2003/200000  | Episode Reward: -8.0  | Average Reward 2.69  | Actor loss: 0.44 | Critic loss: 6.82 | Entropy loss: -0.0013  | Total Loss: 7.25 | Total Steps: 269
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 2004/200000  | Episode Reward: -49.0  | Average Reward 2.40  | Actor loss: -1.28 | Critic loss: 11.33 | Entropy loss: -0.0269  | Total Loss: 10.02 | Total Steps: 500
--- 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2028/200000  | Episode Reward: 4.0  | Average Reward 2.73  | Actor loss: -0.69 | Critic loss: 5.39 | Entropy loss: -0.0187  | Total Loss: 4.68 | Total Steps: 121
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2029/200000  | Episode Reward: 7.0  | Average Reward 2.71  | Actor loss: -0.27 | Critic loss: 4.06 | Entropy loss: -0.0175  | Total Loss: 3.78 | Total Steps: 189
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2030/200000  | Episode Reward: 10.0  | Average Reward 2.71  | Actor loss: 0.49 | Critic loss: 3.96 | Entropy loss: -0.0028  | Total Loss: 4.45 | Total Steps: 20
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | E

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2053/200000  | Episode Reward: 10.0  | Average Reward 3.16  | Actor loss: -0.05 | Critic loss: 2.61 | Entropy loss: -0.0166  | Total Loss: 2.55 | Total Steps: 121
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2054/200000  | Episode Reward: 10.0  | Average Reward 3.22  | Actor loss: 0.03 | Critic loss: 2.77 | Entropy loss: -0.0094  | Total Loss: 2.78 | Total Steps: 123
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2055/200000  | Episode Reward: 4.0  | Average Reward 3.29  | Actor loss: -0.30 | Critic loss: 5.33 | Entropy loss: -0.0146  | Total Loss: 5.01 | Total Steps: 159
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2056/200000  |

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2078/200000  | Episode Reward: 4.0  | Average Reward 3.57  | Actor loss: -0.12 | Critic loss: 6.19 | Entropy loss: -0.0113  | Total Loss: 6.06 | Total Steps: 100
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2079/200000  | Episode Reward: 4.0  | Average Reward 3.56  | Actor loss: -0.50 | Critic loss: 3.54 | Entropy loss: -0.0187  | Total Loss: 3.02 | Total Steps: 143
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2080/200000  | Episode Reward: 7.0  | Average Reward 3.60  | Actor loss: -0.05 | Critic loss: 3.17 | Entropy loss: -0.0132  | Total Loss: 3.10 | Total Steps: 147
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2103/200000  | Episode Reward: 10.0  | Average Reward 3.73  | Actor loss: 1.06 | Critic loss: 6.59 | Entropy loss: -0.0018  | Total Loss: 7.65 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2104/200000  | Episode Reward: 10.0  | Average Reward 3.75  | Actor loss: 0.79 | Critic loss: 7.72 | Entropy loss: -0.0107  | Total Loss: 8.50 | Total Steps: 59
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2105/200000  | Episode Reward: -2.0  | Average Reward 3.69  | Actor loss: -0.49 | Critic loss: 5.80 | Entropy loss: -0.0138  | Total Loss: 5.30 | Total Steps: 167
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step re

Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 2126/200000  | Episode Reward: -49.0  | Average Reward 3.44  | Actor loss: -1.01 | Critic loss: 6.74 | Entropy loss: -0.0353  | Total Loss: 5.70 | Total Steps: 500
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2127/200000  | Episode Reward: 1.0  | Average Reward 3.46  | Actor loss: -0.11 | Critic loss: 2.59 | Entropy loss: -0.0215  | Total Loss: 2.46 | Total Steps: 406
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2128/200000  | Episode Reward: 3.0  | Average Reward 3.42  | Actor loss: -0.31 | Critic loss: 5.00 | Entropy loss: -0.0194  | Total Loss: 4.67 | Total Steps: 117
--- target colour: yellow, target object: cylinder ---
Ag

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2151/200000  | Episode Reward: 4.0  | Average Reward 3.29  | Actor loss: -0.19 | Critic loss: 4.01 | Entropy loss: -0.0230  | Total Loss: 3.79 | Total Steps: 165
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2152/200000  | Episode Reward: 1.0  | Average Reward 3.29  | Actor loss: -0.27 | Critic loss: 5.63 | Entropy loss: -0.0133  | Total Loss: 5.35 | Total Steps: 155
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2175/200000  | Episode Reward: 4.0  | Average Reward 2.85  | Actor loss: -0.56 | Critic loss: 5.82 | Entropy loss: -0.0102  | Total Loss: 5.24 | Total Steps: 110
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2176/200000  | Episode Reward: 10.0  | Average Reward 2.91  | Actor loss: 0.17 | Critic loss: 5.86 | Entropy loss: -0.0005  | Total Loss: 6.02 | Total Steps: 21
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2177/200000  | Episode Reward: 4.0  | Average Reward 2.91  | Actor loss: 0.02 | Critic loss: 6.26 | Entropy loss: -0.0103  | Total Loss: 6.26 | Total Steps: 100
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2198/200000  | Episode Reward: 10.0  | Average Reward 2.72  | Actor loss: 0.60 | Critic loss: 4.82 | Entropy loss: -0.0034  | Total Loss: 5.41 | Total Steps: 19
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2199/200000  | Episode Reward: 1.0  | Average Reward 2.67  | Actor loss: -0.52 | Critic loss: 5.92 | Entropy loss: -0.0198  | Total Loss: 5.38 | Total Steps: 129
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2200/200000  | Episode Reward: 10.0  | Average Reward 2.69  | Actor loss: 0.16 | Critic loss: 5.67 | Entropy loss: -0.0006  | Total Loss: 5.83 | Total Steps: 21
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Ep

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2223/200000  | Episode Reward: -2.0  | Average Reward 2.77  | Actor loss: -0.58 | Critic loss: 6.22 | Entropy loss: -0.0172  | Total Loss: 5.63 | Total Steps: 173
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2224/200000  | Episode Reward: -5.0  | Average Reward 2.71  | Actor loss: -0.27 | Critic loss: 8.28 | Entropy loss: -0.0147  | Total Loss: 8.00 | Total Steps: 168
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2225/200000  | Episode Reward: 7.0  | Average Reward 2.71  | Actor loss: -0.15 | Critic loss: 3.34 | Entr

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2247/200000  | Episode Reward: 1.0  | Average Reward 2.39  | Actor loss: -0.34 | Critic loss: 5.30 | Entropy loss: -0.0151  | Total Loss: 4.95 | Total Steps: 142
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2248/200000  | Episode Reward: 10.0  | Average Reward 2.40  | Actor loss: -0.12 | Critic loss: 3.01 | Entropy loss: -0.0173  | Total Loss: 2.87 | Total Steps: 118
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2249/200000  | Episode Reward: 10.0  | Average Reward 2.40  | Actor loss: -0.04 | Critic loss: 3.48 | Entropy loss: -0.0115  | Total Loss: 3.43 | Total Steps: 101
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2273/200000  | Episode Reward: 4.0  | Average Reward 2.50  | Actor loss: -0.41 | Critic loss: 6.10 | Entropy loss: -0.0189  | Total Loss: 5.67 | Total Steps: 164
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2274/200000  | Episode Reward: -8.0  | Average Reward 2.45  | Actor loss: 0.71 | Critic loss: 6.08 | Entropy loss: -0.0057  | Total Loss: 6.78 | Total Steps: 288
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2275/200000  | Episode Reward: -2.0  | Aver

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2296/200000  | Episode Reward: 1.0  | Average Reward 2.10  | Actor loss: -0.69 | Critic loss: 6.47 | Entropy loss: -0.0236  | Total Loss: 5.76 | Total Steps: 131
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2297/200000  | Episode Reward: 10.0  | Average Reward 2.14  | Actor loss: 0.13 | Critic loss: 5.89 | Entropy loss: -0.0004  | Total Loss: 6.01 | Total Steps: 17
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2298/200000  | Episode Reward: 10.0  | Average Reward 2.20  | Actor loss: 0.15 | Critic loss: 5.47 | Entropy loss: -0.0005  | Total Loss: 5.62 | Total Steps: 21
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2321/200000  | Episode Reward: -2.0  | Average Reward 2.00  | Actor loss: -0.10 | Critic loss: 3.27 | Entropy loss: -0.0062  | Total Loss: 3.15 | Total Steps: 363
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2322/200000  | Episode Reward: 4.0  | Average Reward 2.10  | Actor loss: -0.21 | Critic loss: 4.83 | Entropy loss: -0.0126  | Total Loss: 4.61 | Total Steps: 115
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2323/200000  | Episode Reward: 1.0  | Average Reward 2.09  | Actor loss: -0.66 | Critic loss: 6.21 | Entropy loss: -0.016

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2346/200000  | Episode Reward: 10.0  | Average Reward 2.71  | Actor loss: 0.74 | Critic loss: 4.36 | Entropy loss: -0.0031  | Total Loss: 5.10 | Total Steps: 19
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2347/200000  | Episode Reward: 1.0  | Average Reward 2.70  | Actor loss: -0.30 | Critic loss: 4.97 | Entropy loss: -0.0162  | Total Loss: 4.65 | Total Steps: 127
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2348/200000  | Episode Reward: 4.0  | Average Reward 2.67  | Actor loss: -0.49 | Critic loss: 4.90 | Entropy loss: -0.0168  | Total Loss: 4.40 | Total Steps: 115
--- target colour: green, target object: sphere ---
Agent in terminal steps


Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2371/200000  | Episode Reward: 7.0  | Average Reward 3.07  | Actor loss: -0.35 | Critic loss: 3.32 | Entropy loss: -0.0204  | Total Loss: 2.95 | Total Steps: 222
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2372/200000  | Episode Reward: 10.0  | Average Reward 3.07  | Actor loss: 0.35 | Critic loss: 5.32 | Entropy loss: -0.0009  | Total Loss: 5.68 | Total Steps: 17
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2373/200000  | Episode Reward: 7.0  | Average Reward 3.07  | Actor loss: -0.32 | Critic loss: 4.32 | Entropy loss: -0.0164  | Total Loss: 3.99 | Total Steps: 139
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Termi

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2397/200000  | Episode Reward: 1.0  | Average Reward 3.54  | Actor loss: -0.44 | Critic loss: 5.27 | Entropy loss: -0.0177  | Total Loss: 4.82 | Total Steps: 142
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2398/200000  | Episode Reward: 10.0  | Average Reward 3.54  | Actor loss: 0.05 | Critic loss: 4.85 | Entropy loss: -0.0002  | Total Loss: 4.90 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2399/200000  | Episode Reward: 4.0  | Average Reward 3.56  | Actor loss: -0.11 | Critic loss: 5.52 | Entropy loss: -0.0091  | Total Loss: 5.40 | Total Steps: 96
--- target colour: green, target object: prism ---
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2421/200000  | Episode Reward: 10.0  | Average Reward 3.33  | Actor loss: 1.35 | Critic loss: 4.71 | Entropy loss: -0.0032  | Total Loss: 6.06 | Total Steps: 20
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2422/200000  | Episode Reward: 10.0  | Average Reward 3.35  | Actor loss: 0.11 | Critic loss: 2.65 | Entropy loss: -0.0164  | Total Loss: 2.74 | Total Steps: 127
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2423/200000  | Episode Reward: 7.0  | Average Reward 3.40  | Actor loss: -0.11 | Critic loss: 4.55 | Entropy loss: -0.0265  | Total Loss: 4.41 | Total Steps: 174
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2424/200000  | Episode Reward: 10.0  | Average Rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2446/200000  | Episode Reward: 10.0  | Average Reward 3.61  | Actor loss: 0.37 | Critic loss: 4.49 | Entropy loss: -0.0164  | Total Loss: 4.84 | Total Steps: 85
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2447/200000  | Episode Reward: 4.0  | Average Reward 3.62  | Actor loss: -0.34 | Critic loss: 4.97 | Entropy loss: -0.0154  | Total Loss: 4.61 | Total Steps: 139
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2448/200000  | Episode Reward: 1.0  | Average Reward 3.58  | Actor loss: -0.74 | Critic loss: 8.55 | Entropy loss: -0.0170  | Total Loss: 7.80 | Total Steps: 139
--- target colour: red, target object: sphere ---
Decision Step reward: -3
D

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2470/200000  | Episode Reward: 4.0  | Average Reward 3.18  | Actor loss: -0.60 | Critic loss: 5.04 | Entropy loss: -0.0147  | Total Loss: 4.42 | Total Steps: 110
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2471/200000  | Episode Reward: 10.0  | Average Reward 3.18  | Actor loss: 0.89 | Critic loss: 5.36 | Entropy loss: -0.0020  | Total Loss: 6.25 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2472/200000  | Episode Reward: 10.0  | Average Reward 3.21  | Actor loss: 0.12 | Critic loss: 3.42 | Entropy loss: -0.0158  | Total Loss: 3.52 | Total Steps: 121
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2496/200000  | Episode Reward: -2.0  | Average Reward 3.99  | Actor loss: -0.71 | Critic loss: 5.28 | Entropy loss: -0.0123  | Total Loss: 4.56 | Total Steps: 361
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2497/200000  | Episode Reward: 7.0  | Average Reward 3.98  | Actor loss: -0.44 | Critic loss: 4.81 | Entropy loss: -0.0116  | Total Loss: 4.36 | Total Steps: 111
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2498/200000  | Episode Reward: 10.0  | Average Reward 3.98  | Actor loss: -0.02 | Critic loss: 2.82 | Entropy loss: -0.0108  | Total Loss: 2.79 | Total Steps: 105
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2520/200000  | Episode Reward: -8.0  | Average Reward 3.69  | Actor loss: -0.60 | Critic loss: 6.14 | Entropy loss: -0.0179  | Total Loss: 5.52 | Total Steps: 206
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2521/200000  | Episode Reward: 10.0  | Average Reward 3.75  | Actor loss: 0.54 | Critic loss: 6.78 | Entropy loss: -0.0108  | Total Loss: 7.31 | Total Steps: 73
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2522/200000  | Episode Reward: 10.0  | Average Reward 3.78  | Actor loss: 0.03 | Critic loss: 4.01 | Entropy loss: -0.0002  | Total Loss: 4.04 | Total Steps: 21
--- target colour: green, target object: cyli

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2546/200000  | Episode Reward: 7.0  | Average Reward 3.88  | Actor loss: -0.33 | Critic loss: 3.34 | Entropy loss: -0.0222  | Total Loss: 2.99 | Total Steps: 208
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 2547/200000  | Episode Reward: -31.0  | Average Reward 3.73  | Actor loss: -0.86 | Critic loss: 10.33 | Entropy loss: -0.0282  | Total Loss: 9.44 | Total Steps: 500
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2548/200000  | Episode Reward: 4.0  | Average Reward 3.73  | Actor loss: -0.53 | Critic loss: 5.17 | Entropy loss: -

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2569/200000  | Episode Reward: -5.0  | Average Reward 3.56  | Actor loss: -0.62 | Critic loss: 6.52 | Entropy loss: -0.0231  | Total Loss: 5.88 | Total Steps: 220
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2570/200000  | Episode Reward: 4.0  | Average Reward 3.56  | Actor loss: -0.57 | Critic loss: 5.67 | Entropy loss: -0.0182  | Total Loss: 5.08 | Total Steps: 105
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2571/200000  | Episode Reward: 10.0  | Average Reward 3.58  | Actor loss: 1.68 | Critic loss: 4.79 | Entropy loss: -0.0047  | Total Loss: 6.47 | Total Steps: 18
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2593/200000  | Episode Reward: 4.0  | Average Reward 3.29  | Actor loss: -0.43 | Critic loss: 5.50 | Entropy loss: -0.0128  | Total Loss: 5.06 | Total Steps: 137
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2594/200000  | Episode Reward: -8.0  | Average Reward 3.21  | Actor loss: -0.33 | Critic loss: 3.77 | Entropy loss: -0.0108  | Total Loss: 3.44 | Total Steps: 357
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2595/200000  | Episode Reward: 4.0  | Average Reward 3.19  | Actor loss: -0.14 | Critic loss: 4.49 | 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2617/200000  | Episode Reward: 4.0  | Average Reward 3.18  | Actor loss: -0.06 | Critic loss: 5.20 | Entropy loss: -0.0179  | Total Loss: 5.11 | Total Steps: 107
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2618/200000  | Episode Reward: -2.0  | Average Reward 3.12  | Actor loss: 0.22 | Critic loss: 8.67 | Entropy loss: -0.0020  | Total Loss: 8.89 | Total Steps: 291
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2619/200000  | Episode Reward: -3.0  | Average Reward 3.10  | Actor loss: -0.38 | Critic loss: 6.6

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2641/200000  | Episode Reward: 1.0  | Average Reward 2.71  | Actor loss: -0.58 | Critic loss: 6.29 | Entropy loss: -0.0191  | Total Loss: 5.70 | Total Steps: 172
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2642/200000  | Episode Reward: 10.0  | Average Reward 2.79  | Actor loss: 0.15 | Critic loss: 2.63 | Entropy loss: -0.0018  | Total Loss: 2.78 | Total Steps: 23
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2643/200000  | Episode Reward: -11.0  | Average Reward 2.74  | Actor loss: -0.48 | Critic loss: 3.

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2665/200000  | Episode Reward: 10.0  | Average Reward 2.68  | Actor loss: -0.06 | Critic loss: 2.45 | Entropy loss: -0.0163  | Total Loss: 2.38 | Total Steps: 135
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2666/200000  | Episode Reward: 1.0  | Average Reward 2.92  | Actor loss: -0.32 | Critic loss: 6.21 | Entropy loss: -0.0074  | Total Loss: 5.88 | Total Steps: 134
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2667/200000  | Episode Reward: -2.0  | Average Reward 2.85  | Actor loss: -0.29 | Critic loss: 7.02 | Entropy loss: -0.0153  | Total Loss: 6.71 | Total Steps: 173
--- target colo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2690/200000  | Episode Reward: 1.0  | Average Reward 2.63  | Actor loss: -0.46 | Critic loss: 5.49 | Entropy loss: -0.0201  | Total Loss: 5.01 | Total Steps: 170
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2691/200000  | Episode Reward: 1.0  | Average Reward 2.62  | Actor loss: -0.38 | Critic loss: 5.12 | Entropy loss: -0.0175  | Total Loss: 4.72 | Total Steps: 181
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2692/200000  | Episode Reward: 10.0  | Average Reward 2.67  | Actor loss: 0.14 | Critic loss: 3.21 | Entropy loss: -0.0028  | Total Loss: 3.34 | Total Steps: 24
--- target colour: black, target object: capsu

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2715/200000  | Episode Reward: 1.0  | Average Reward 2.94  | Actor loss: -0.24 | Critic loss: 4.79 | Entropy loss: -0.0187  | Total Loss: 4.53 | Total Steps: 142
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2716/200000  | Episode Reward: 6.0  | Average Reward 2.92  | Actor loss: -0.37 | Critic loss: 2.94 | Entropy loss: -0.0177  | Total Loss: 2.55 | Total Steps: 172
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2717/200000  | Episode Reward: 10.0  | Average Reward 2.92  | Actor loss: -0.02 | Critic loss: 1.94 | Entropy loss: -0.0308  | Total Loss: 1.89 | Total Steps: 211
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2740/200000  | Episode Reward: 10.0  | Average Reward 2.88  | Actor loss: 1.09 | Critic loss: 6.29 | Entropy loss: -0.0035  | Total Loss: 7.38 | Total Steps: 18
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2741/200000  | Episode Reward: 3.0  | Average Reward 2.85  | Actor loss: 0.37 | Critic loss: 11.11 | Entropy loss: -0.0004  | Total Loss: 11.49 | Total Steps: 258
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2742/200000  | Episode Reward: 1.0  | Average Reward 2.81  | Actor loss: -0.21 | Critic loss: 4.34 | Entropy loss: -0.0317  | Total Loss: 4.10 | Total Steps: 169
--- target colour: red, target object: sphere ---
Decision St

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2766/200000  | Episode Reward: 4.0  | Average Reward 3.14  | Actor loss: 1.20 | Critic loss: 9.03 | Entropy loss: -0.0024  | Total Loss: 10.23 | Total Steps: 275
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2767/200000  | Episode Reward: 9.0  | Average Reward 3.21  | Actor loss: -0.28 | Critic loss: 2.92 | Entropy loss: -0.0211  | Total Loss: 2.61 | Total Steps: 176
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2768/200000  | Episode Reward: 10.0  | Average Reward 3.38  | Actor loss: 0.21 | Critic loss: 3.64 | Entropy loss: -0.0012  | Total Loss: 3.85 | Total Steps: 19
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2791/200000  | Episode Reward: 10.0  | Average Reward 3.56  | Actor loss: 0.50 | Critic loss: 4.99 | Entropy loss: -0.0015  | Total Loss: 5.49 | Total Steps: 17
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2792/200000  | Episode Reward: 4.0  | Average Reward 3.54  | Actor loss: -0.20 | Critic loss: 3.83 | Entropy loss: -0.0193  | Total Loss: 3.61 | Total Steps: 171
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2793/200000  | Episode Reward: 4.0  | Average Reward 3.54  | Actor loss: -0.75 | Critic loss: 6.11 | Entropy loss: -0.0138  | Total Loss: 5.34 | Total Steps: 121
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2816/200000  | Episode Reward: -5.0  | Average Reward 3.68  | Actor loss: -0.90 | Critic loss: 7.72 | Entropy loss: -0.0239  | Total Loss: 6.80 | Total Steps: 172
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2817/200000  | Episode Reward: 4.0  | Average Reward 3.68  | Actor loss: -0.41 | Critic loss: 5.37 | Entropy loss: -0.0151  | Total Loss: 4.95 | Total Steps: 103
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2818/200000  | Episode Reward: 4.0  | Average Reward 3.71  | Actor loss: -0.37 | Critic loss: 3.95 | Entropy loss: -0.0369  | Total Lo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2842/200000  | Episode Reward: 10.0  | Average Reward 4.23  | Actor loss: 0.68 | Critic loss: 5.38 | Entropy loss: -0.0018  | Total Loss: 6.06 | Total Steps: 17
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2843/200000  | Episode Reward: 7.0  | Average Reward 4.32  | Actor loss: -0.21 | Critic loss: 3.19 | Entropy loss: -0.0279  | Total Loss: 2.95 | Total Steps: 175
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2844/200000  | Episode Reward: 1.0  | Average Reward 4.28  | Actor loss: -0.32 | Critic loss: 5.61 | Entropy loss: -0.0244  | Total Loss: 5.26 | Total Steps: 197
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in term

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2868/200000  | Episode Reward: 4.0  | Average Reward 4.67  | Actor loss: -0.24 | Critic loss: 4.38 | Entropy loss: -0.0127  | Total Loss: 4.14 | Total Steps: 150
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2869/200000  | Episode Reward: 0.0  | Average Reward 4.63  | Actor loss: -0.60 | Critic loss: 5.65 | Entropy loss: -0.0251  | Total Loss: 5.03 | Total Steps: 181
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2870/200000  | Episode Reward: -2.0  | Average Reward 4.63  | Actor loss: -0.65 | Critic loss: 5.50 | Entropy loss: -0.0302  | Total 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2893/200000  | Episode Reward: 1.0  | Average Reward 4.33  | Actor loss: -0.43 | Critic loss: 7.11 | Entropy loss: -0.0121  | Total Loss: 6.67 | Total Steps: 143
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2894/200000  | Episode Reward: 10.0  | Average Reward 4.33  | Actor loss: 0.79 | Critic loss: 7.71 | Entropy loss: -0.0015  | Total Loss: 8.50 | Total Steps: 17
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2895/200000  | Episode Reward: 10.0  | Average Reward 4.33  | Actor loss: 0.08 | Critic loss: 5.93 | Entropy loss: -0.0002  | Total Loss: 6.01 | Total Steps: 17
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 289

Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2917/200000  | Episode Reward: -5.0  | Average Reward 4.31  | Actor loss: 0.69 | Critic loss: 4.53 | Entropy loss: -0.0038  | Total Loss: 5.21 | Total Steps: 286
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2918/200000  | Episode Reward: 10.0  | Average Reward 4.31  | Actor loss: 0.40 | Critic loss: 5.59 | Entropy loss: -0.0011  | Total Loss: 5.99 | Total Steps: 16
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2919/200000  | Episode Reward: 1.0  | Average Reward 4.26  | Actor loss: -0.39 | Critic loss: 5.58 | Entropy loss: -0.0202  | Total Loss: 5.17 | Total Steps: 159
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in t

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2942/200000  | Episode Reward: 10.0  | Average Reward 4.46  | Actor loss: -0.05 | Critic loss: 3.01 | Entropy loss: -0.0098  | Total Loss: 2.96 | Total Steps: 113
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2943/200000  | Episode Reward: 10.0  | Average Reward 4.53  | Actor loss: -0.12 | Critic loss: 2.52 | Entropy loss: -0.0135  | Total Loss: 2.38 | Total Steps: 144
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2944/200000  | Episode Reward: 7.0  | Average Reward 4.51  | Actor loss: -0.14 | Critic loss: 4.04 | Entropy loss: -0.0097  | Total Loss: 3.88 | Total Steps: 135
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2966/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.24 | Critic loss: 5.40 | Entropy loss: -0.0105  | Total Loss: 5.15 | Total Steps: 113
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2967/200000  | Episode Reward: -8.0  | Average Reward 3.88  | Actor loss: -0.37 | Critic loss: 6.51 | Entropy loss: -0.0247  | Total Loss: 6.12 | Total Steps: 230
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2968/200000  | Episode Reward: 4.0  | Average Reward 3.85  | Actor loss: -0.22 | Critic loss: 4.16 | En

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2990/200000  | Episode Reward: -2.0  | Average Reward 3.54  | Actor loss: -0.53 | Critic loss: 3.53 | Entropy loss: -0.0337  | Total Loss: 2.97 | Total Steps: 231
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2991/200000  | Episode Reward: 4.0  | Average Reward 3.50  | Actor loss: 0.03 | Critic loss: 4.16 | Entropy loss: -0.0157  | Total Loss: 4.18 | Total Steps: 105
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 2992/200000  | Episode Reward: 10.0  | Average Reward 3.54  | Actor loss: 1.25 | Critic loss: 6.60 | Entropy loss: -0.0028  | Total Loss: 7.85 | Total Steps: 17
--- target colour: yellow, target object: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3015/200000  | Episode Reward: -5.0  | Average Reward 3.76  | Actor loss: -0.91 | Critic loss: 6.92 | Entropy loss: -0.0282  | Total Loss: 5.99 | Total Steps: 181
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3016/200000  | Episode Reward: -5.0  | Average Reward 3.76  | Actor loss: -0.78 | Critic loss: 5.45 | Entropy loss: -0.0248  | Total Loss: 4.64 | Total Steps: 219
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3017/200000  | Episode Reward: 9.0  | Average Reward 3.79  | Actor loss: -0.17 | Critic loss: 2.69 | Entropy

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3040/200000  | Episode Reward: 4.0  | Average Reward 3.71  | Actor loss: -0.30 | Critic loss: 5.19 | Entropy loss: -0.0113  | Total Loss: 4.88 | Total Steps: 111
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3041/200000  | Episode Reward: 4.0  | Average Reward 3.71  | Actor loss: -0.24 | Critic loss: 4.54 | Entropy loss: -0.0111  | Total Loss: 4.29 | Total Steps: 148
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3042/200000  | Episode Reward: 10.0  | Average Reward 3.71  | Actor loss: -0.12 | Critic loss: 3.11 | Entropy loss: -0.0138  | Total Loss: 2.97 | Total Steps: 111
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3066/200000  | Episode Reward: 3.0  | Average Reward 3.58  | Actor loss: -0.26 | Critic loss: 4.92 | Entropy loss: -0.0314  | Total Loss: 4.63 | Total Steps: 196
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3067/200000  | Episode Reward: 6.0  | Average Reward 3.60  | Actor loss: 0.06 | Critic loss: 3.97 | Entropy loss: -0.0005  | Total Loss: 4.02 | Total Steps: 285
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3068/200000  | Episode Reward: -5.0  | Average Reward 3.56  | Actor loss: -0.69 | Critic loss: 7.06 | Entropy loss: -0.0203  | Total Loss: 6.34 | To

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3090/200000  | Episode Reward: -10.0  | Average Reward 3.65  | Actor loss: 0.15 | Critic loss: 6.31 | Entropy loss: -0.0046  | Total Loss: 6.45 | Total Steps: 305
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 3091/200000  | Episode Reward: -16.0  | Average Reward 3.52  | Actor loss: -0.49 | Critic loss: 5.75 | Entropy loss: -0.0234  | Total Loss: 5.23 | Total Steps: 500
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3092/200000  | Episode Reward: 10.0  | Average Reward 3.52  | Actor loss: 1.28 | Critic loss: 5.08 | Entropy loss: -0.0036  | Total Loss: 6.35 | Total Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3114/200000  | Episode Reward: -6.0  | Average Reward 3.25  | Actor loss: 0.94 | Critic loss: 11.48 | Entropy loss: -0.0021  | Total Loss: 12.42 | Total Steps: 265
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3115/200000  | Episode Reward: -2.0  | Average Reward 3.19  | Actor loss: -1.31 | Critic loss: 7.00 | Entropy loss: -0.0234  | Total Loss: 5.67 | Total Steps: 122
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3116/200000  | Episode Reward: 10.0  | Average Reward 3.31  | Actor loss: 0.43 | Critic loss: 5.26 | Entropy loss: -0

Training  | Episode: 3137/200000  | Episode Reward: -46.0  | Average Reward 2.95  | Actor loss: -0.84 | Critic loss: 9.16 | Entropy loss: -0.0264  | Total Loss: 8.30 | Total Steps: 500
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3138/200000  | Episode Reward: 7.0  | Average Reward 2.98  | Actor loss: -0.29 | Critic loss: 2.83 | Entropy loss: -0.0154  | Total Loss: 2.53 | Total Steps: 118
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3139/200000  | Episode Reward: 4.0  | Average Reward 2.98  | Actor loss: -0.18 | Critic loss: 4.37 | Entropy loss: -0.0221  | Total Loss: 4.16 | Total Steps: 173
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3160/200000  | Episode Reward: 0.0  | Average Reward 2.66  | Actor loss: 0.13 | Critic loss: 4.00 | Entropy loss: -0.0034  | Total Loss: 4.13 | Total Steps: 297
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3161/200000  | Episode Reward: 10.0  | Average Reward 2.73  | Actor loss: -0.00 | Critic loss: 4.06 | Entropy loss: -0.0151  | Total Loss: 4.04 | Total Steps: 104
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3162/200000  | Episode Reward: 10.0  | Average Reward 2.81  | Actor loss: 1.25 | Critic loss: 4.91 | Entropy loss: -0.0027  | Total Loss: 6.15 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3185/200000  | Episode Reward: 1.0  | Average Reward 3.31  | Actor loss: -0.44 | Critic loss: 5.24 | Entropy loss: -0.0142  | Total Loss: 4.79 | Total Steps: 109
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3186/200000  | Episode Reward: -11.0  | Average Reward 3.21  | Actor loss: -0.59 | Critic loss: 5.49 | Entropy loss: -0.0082  | Total Loss: 4.90 | Total Steps: 365
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3187/200000  | Episode Reward: 10.0  | Average Reward 3.27  | Actor loss: 0.62 | Critic loss: 6.03 | Entropy loss: -0.0014  | Total Loss: 6.65 | T

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3234/200000  | Episode Reward: 10.0  | Average Reward 2.95  | Actor loss: 0.25 | Critic loss: 3.30 | Entropy loss: -0.0023  | Total Loss: 3.55 | Total Steps: 23
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3235/200000  | Episode Reward: 10.0  | Average Reward 3.01  | Actor loss: 0.21 | Critic loss: 3.01 | Entropy loss: -0.0011  | Total Loss: 3.22 | Total Steps: 19
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3236/200000  | Episode Reward: 3.0  | Average Reward 3.02  | Actor loss: -0.58 | Critic loss: 4.47 | Entropy loss: -0.0263  | Total Loss: 3.86 | Total Steps: 165
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3237/200000  | Episode Rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3260/200000  | Episode Reward: 10.0  | Average Reward 2.93  | Actor loss: 0.83 | Critic loss: 4.99 | Entropy loss: -0.0017  | Total Loss: 5.82 | Total Steps: 17
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3261/200000  | Episode Reward: 10.0  | Average Reward 2.98  | Actor loss: 1.28 | Critic loss: 3.71 | Entropy loss: -0.0023  | Total Loss: 4.98 | Total Steps: 19
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3262/200000  | Episode Reward: 7.0  | Average Reward 2.96  | Actor loss: -0.27 | Critic loss: 3.73 | Entropy loss: -0.0251  | Total Loss: 3.44 | Total Steps: 177
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3285/200000  | Episode Reward: 1.0  | Average Reward 2.95  | Actor loss: -0.61 | Critic loss: 6.27 | Entropy loss: -0.0101  | Total Loss: 5.65 | Total Steps: 145
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 3286/200000  | Episode Reward: -41.0  | Average Reward 2.73  | Actor loss: -0.97 | Critic loss: 9.51 | Entropy loss: -0.0288  | Total Loss: 8.52 | Total Steps: 500
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3287/200000  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3310/200000  | Episode Reward: 9.0  | Average Reward 3.19  | Actor loss: -0.07 | Critic loss: 2.38 | Entropy loss: -0.0175  | Total Loss: 2.29 | Total Steps: 165
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3311/200000  | Episode Reward: 10.0  | Average Reward 3.23  | Actor loss: 0.39 | Critic loss: 2.92 | Entropy loss: -0.0033  | Total Loss: 3.30 | Total Steps: 22
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3312/200000  | Episode Reward: 1.0  | Average Reward 3.31  | Actor loss: -0.45 | Critic loss: 7.74 | Entropy loss: -0.0141  | Total Loss: 7.28 | Total Steps: 110
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in termin

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3337/200000  | Episode Reward: -2.0  | Average Reward 3.91  | Actor loss: -0.46 | Critic loss: 6.86 | Entropy loss: -0.0118  | Total Loss: 6.39 | Total Steps: 143
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3338/200000  | Episode Reward: 4.0  | Average Reward 3.90  | Actor loss: -0.99 | Critic loss: 5.38 | Entropy loss: -0.0149  | Total Loss: 4.38 | Total Steps: 115
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3339/200000  | Episode Reward: 10.0  | Average Reward 3.92  | Actor loss: -0.41 | Critic loss: 2.66 | Entropy loss: -0.0221  | Total Loss: 2.23 | Total Steps: 222
--- target colour: green, target object: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3361/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.54 | Critic loss: 4.36 | Entropy loss: -0.0019  | Total Loss: 4.89 | Total Steps: 17
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3362/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.02 | Critic loss: 3.70 | Entropy loss: -0.0132  | Total Loss: 3.71 | Total Steps: 107
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3363/200000  | Episode Reward: 7.0  | Average Reward 4.21  | Actor loss: -0.31 | Critic loss: 3.63 | Entropy loss: -0.0096  | Total Loss: 3.31 | Total Steps: 144
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3386/200000  | Episode Reward: -8.0  | Average Reward 4.04  | Actor loss: -0.41 | Critic loss: 6.29 | Entropy loss: -0.0044  | Total Loss: 5.88 | Total Steps: 323
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3387/200000  | Episode Reward: -5.0  | Average Reward 3.96  | Actor loss: -0.93 | Critic loss: 6.38 | Entropy loss: -0.0317  | Total Loss: 5.42 | Total Steps: 184
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3388/200000  | Episode Reward: 10.0  | Average Reward 3.96  | Actor lo

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3409/200000  | Episode Reward: 2.0  | Average Reward 3.77  | Actor loss: -0.12 | Critic loss: 4.85 | Entropy loss: -0.0136  | Total Loss: 4.71 | Total Steps: 170
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3410/200000  | Episode Reward: -15.0  | Average Reward 3.69  | Actor loss: -0.40 | Critic loss: 5.49 | Entropy loss: -0.0112  | Total Loss: 5.09 | Total Steps: 434
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3411/200000  | Episode Reward: 10.0  | Average Reward 3.69  | Actor loss: 0.05 | Critic loss: 5

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3433/200000  | Episode Reward: 2.0  | Average Reward 3.71  | Actor loss: -0.36 | Critic loss: 5.28 | Entropy loss: -0.0245  | Total Loss: 4.90 | Total Steps: 160
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3434/200000  | Episode Reward: 4.0  | Average Reward 3.69  | Actor loss: -0.31 | Critic loss: 3.56 | Entropy loss: -0.0164  | Total Loss: 3.23 | Total Steps: 162
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3435/200000  | Episode Reward: 8.0  | Average Reward 3.67  | Actor loss: -0.30 | Critic loss: 4.26 | Entropy loss: -0.0183  | Total Loss: 3.94 | Total Steps: 112
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3459/200000  | Episode Reward: -3.0  | Average Reward 3.81  | Actor loss: -0.58 | Critic loss: 7.16 | Entropy loss: -0.0187  | Total Loss: 6.56 | Total Steps: 148
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3460/200000  | Episode Reward: -4.0  | Average Reward 3.74  | Actor loss: -0.47 | Critic loss: 6.98 | Entropy loss: -0.0212  | Total Loss: 6.49 | Total Steps: 172
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3461/200000  | Episode Reward: 10.0  | Average Reward 3.74  | Actor loss: 0.14 | Critic loss: 4.06 | Entropy loss: -0.0014  | Total Loss: 4.20 | Total St

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3484/200000  | Episode Reward: 7.0  | Average Reward 3.94  | Actor loss: 0.61 | Critic loss: 8.55 | Entropy loss: -0.0104  | Total Loss: 9.15 | Total Steps: 82
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3485/200000  | Episode Reward: 1.0  | Average Reward 3.94  | Actor loss: -0.32 | Critic loss: 4.23 | Entropy loss: -0.0149  | Total Loss: 3.89 | Total Steps: 176
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3486/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.52 | Critic loss: 5.03 | Entropy loss: -0.0013  | Total Loss: 5.55 | Total Steps: 16
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3509/200000  | Episode Reward: 1.0  | Average Reward 3.94  | Actor loss: -0.34 | Critic loss: 5.00 | Entropy loss: -0.0181  | Total Loss: 4.64 | Total Steps: 182
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3510/200000  | Episode Reward: 4.0  | Average Reward 3.92  | Actor loss: -0.46 | Critic loss: 4.82 | Entropy loss: -0.0098  | Total Loss: 4.34 | Total Steps: 102
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3511/200000  | Episode Reward: 1.0  | Average Reward 3.88  | Actor loss: -0.62 | Critic loss: 4.14 | Entropy loss: -0.0156  | Total Loss: 3.50 | Total Steps:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3535/200000  | Episode Reward: 4.0  | Average Reward 3.83  | Actor loss: -0.11 | Critic loss: 5.30 | Entropy loss: -0.0127  | Total Loss: 5.18 | Total Steps: 106
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3536/200000  | Episode Reward: 10.0  | Average Reward 3.87  | Actor loss: 0.10 | Critic loss: 4.01 | Entropy loss: -0.0134  | Total Loss: 4.10 | Total Steps: 108
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3537/200000  | Episode Reward: 4.0  | Average Reward 3.90  | Actor loss: -0.16 | Critic loss: 4.96 | Entropy loss: -0.0158  | Total Loss: 4.78 | Total Steps: 114
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3559/200000  | Episode Reward: 10.0  | Average Reward 4.00  | Actor loss: 0.23 | Critic loss: 5.36 | Entropy loss: -0.0005  | Total Loss: 5.60 | Total Steps: 17
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3560/200000  | Episode Reward: 10.0  | Average Reward 4.00  | Actor loss: 0.37 | Critic loss: 3.82 | Entropy loss: -0.0014  | Total Loss: 4.20 | Total Steps: 17
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3561/200000  | Episode Reward: 4.0  | Average Reward 3.97  | Actor loss: -0.32 | Critic loss: 4.40 | Entropy loss: -0.0113  | Total Loss: 4.07 | Total Steps: 115
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3562/200000  | Episode R

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3584/200000  | Episode Reward: 10.0  | Average Reward 4.03  | Actor loss: 0.04 | Critic loss: 2.99 | Entropy loss: -0.0102  | Total Loss: 3.03 | Total Steps: 109
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3585/200000  | Episode Reward: 1.0  | Average Reward 4.05  | Actor loss: -0.83 | Critic loss: 4.68 | Entropy loss: -0.0265  | Total Loss: 3.82 | Total Steps: 182
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3586/200000  | Episode Reward: -11.0  | Average Reward 4.04  | Actor loss: -0.75 | Critic loss: 7.63 | Ent

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3609/200000  | Episode Reward: -10.0  | Average Reward 4.33  | Actor loss: 0.08 | Critic loss: 2.76 | Entropy loss: -0.0017  | Total Loss: 2.84 | Total Steps: 290
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3610/200000  | Episode Reward: 10.0  | Average Reward 4.46  | Actor loss: -0.10 | Critic loss: 2.62 | Entropy loss: -0.0158  | Total Loss: 2.51 | Total Steps: 128
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3611/200000  | Episode Reward: 4.0  | Average Reward 4.43  | Actor loss: -0.04 | Critic loss: 4.27 | Entropy loss: -0.0099  | Total Loss: 4.22 | To

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3634/200000  | Episode Reward: 1.0  | Average Reward 4.65  | Actor loss: -0.26 | Critic loss: 4.15 | Entropy loss: -0.0270  | Total Loss: 3.86 | Total Steps: 183
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3635/200000  | Episode Reward: 10.0  | Average Reward 4.66  | Actor loss: -0.12 | Critic loss: 2.64 | Entropy loss: -0.0117  | Total Loss: 2.51 | Total Steps: 144
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3636/200000  | Episode Reward: 1.0  | Average Reward 4.62  | Actor loss: -0.61 | Critic loss: 6.12 | Entropy loss: -0.0131  | Total Loss: 5.49 | Total Steps: 142
--- target colour: red, target object: cylin

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3660/200000  | Episode Reward: 4.0  | Average Reward 4.61  | Actor loss: -0.61 | Critic loss: 4.81 | Entropy loss: -0.0138  | Total Loss: 4.19 | Total Steps: 106
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3661/200000  | Episode Reward: 10.0  | Average Reward 4.61  | Actor loss: 0.29 | Critic loss: 5.04 | Entropy loss: -0.0060  | Total Loss: 5.32 | Total Steps: 79
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3662/200000  | Episode Reward: -14.0  | Average Reward 4.49  | Actor loss: -0.08 | Critic loss: 1.50

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3685/200000  | Episode Reward: 10.0  | Average Reward 4.54  | Actor loss: -0.16 | Critic loss: 1.70 | Entropy loss: -0.0327  | Total Loss: 1.51 | Total Steps: 230
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3686/200000  | Episode Reward: -2.0  | Average Reward 4.47  | Actor loss: -0.64 | Critic loss: 4.51 | Entropy loss: -0.0244  | Total Loss: 3.85 | Total Steps: 211
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3687/200000  | Episode Reward: -2.0  | Average Reward 4.45  | Actor loss: -0.62 | Critic loss: 4.63 | Entropy loss: -0.0274  | Total Loss: 3.98 | Total 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3711/200000  | Episode Reward: 4.0  | Average Reward 4.77  | Actor loss: -0.32 | Critic loss: 5.98 | Entropy loss: -0.0088  | Total Loss: 5.65 | Total Steps: 103
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3712/200000  | Episode Reward: 4.0  | Average Reward 4.74  | Actor loss: -0.40 | Critic loss: 5.54 | Entropy loss: -0.0098  | Total Loss: 5.13 | Total Steps: 143
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3713/200000  | Episode Reward: 1.0  | Average Reward 4.70  | Actor loss: -0.26 | Critic loss: 5.05 | Entropy loss: -0.0144  | Total Loss: 4.77 | Total Steps: 127
--- target colour: b

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3737/200000  | Episode Reward: -2.0  | Average Reward 4.66  | Actor loss: -0.53 | Critic loss: 5.68 | Entropy loss: -0.0192  | Total Loss: 5.13 | Total Steps: 228
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3738/200000  | Episode Reward: 4.0  | Average Reward 4.68  | Actor loss: -0.10 | Critic loss: 3.75 | Entropy loss: -0.0196  | Total Loss: 3.64 | Total Steps: 169
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3739/200000  | Episode Reward: 4.0  | Average Reward 4.68  | Actor loss: -0.34 | Critic loss: 4.14 | Entropy loss: -0.0236  | Total Loss: 3.77 | Total Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3761/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: 0.70 | Critic loss: 5.34 | Entropy loss: -0.0018  | Total Loss: 6.04 | Total Steps: 16
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3762/200000  | Episode Reward: 1.0  | Average Reward 4.52  | Actor loss: -0.51 | Critic loss: 6.30 | Entropy loss: -0.0157  | Total Loss: 5.78 | Total Steps: 163
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3763/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: -0.39 | Critic loss: 3.30 | Entropy loss: -0.0227  | Total Loss: 2.89 | Total Steps: 173
--- target colour: green, target object: sphere ---
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3786/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: -0.22 | Critic loss: 5.34 | Entropy loss: -0.0140  | Total Loss: 5.10 | Total Steps: 113
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3787/200000  | Episode Reward: 4.0  | Average Reward 4.51  | Actor loss: -0.52 | Critic loss: 3.98 | Entropy loss: -0.0218  | Total Loss: 3.44 | Total Steps: 184
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3788/200000  | Episode Reward: 10.0  | Average Reward 4.53  | Actor loss: 0.17 | Critic loss: 4.82 | Entropy loss: -0.0134  | Total Loss: 4.98 | Total Steps: 75
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3811/200000  | Episode Reward: -2.0  | Average Reward 4.41  | Actor loss: -0.43 | Critic loss: 6.43 | Entropy loss: -0.0210  | Total Loss: 5.98 | Total Steps: 182
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3812/200000  | Episode Reward: 1.0  | Average Reward 4.38  | Actor loss: -0.07 | Critic loss: 4.92 | Entropy loss: -0.0153  | Total Loss: 4.84 | Total Steps: 185
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3813/200000  | Episode Reward: 4.0  | Average Reward 4.38  | Actor loss: -0.22 | Critic loss: 4.76 | Entropy loss: -0.0135  | To

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3835/200000  | Episode Reward: 10.0  | Average Reward 4.03  | Actor loss: 0.83 | Critic loss: 6.32 | Entropy loss: -0.0020  | Total Loss: 7.15 | Total Steps: 23
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3836/200000  | Episode Reward: 1.0  | Average Reward 4.03  | Actor loss: -0.37 | Critic loss: 4.65 | Entropy loss: -0.0227  | Total Loss: 4.26 | Total Steps: 181
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3837/200000  | Episode Reward: 3.0  | Average Reward 4.00  | Actor loss: -0.47 | Critic loss: 4.69 | Entropy loss: -0.0223  | Total Loss: 4.20 | Total Steps: 116
--- target colour: black, target object: cube ---
Decision Step reward: -

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3860/200000  | Episode Reward: 7.0  | Average Reward 3.90  | Actor loss: -0.25 | Critic loss: 3.64 | Entropy loss: -0.0189  | Total Loss: 3.37 | Total Steps: 159
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3861/200000  | Episode Reward: 7.0  | Average Reward 3.88  | Actor loss: -0.37 | Critic loss: 3.91 | Entropy loss: -0.0218  | Total Loss: 3.51 | Total Steps: 158
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3862/200000  | Episode Reward: 1.0  | Average Reward 3.96  | Actor loss: -0.42 | Critic loss: 6.31 | Entropy loss: -0.0223  | Total Loss: 5.87 | Total Steps: 165
--- target colour: black, target object: capsule ---
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3885/200000  | Episode Reward: 7.0  | Average Reward 3.90  | Actor loss: -0.30 | Critic loss: 2.93 | Entropy loss: -0.0164  | Total Loss: 2.62 | Total Steps: 225
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3886/200000  | Episode Reward: 4.0  | Average Reward 3.93  | Actor loss: -0.26 | Critic loss: 4.37 | Entropy loss: -0.0202  | Total Loss: 4.08 | Total Steps: 176
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3887/200000  | Episode Reward: 10.0  | Average Reward 3.99  | Actor loss: -0.01 | Critic loss: 2.46 | Entropy loss: -0.0176  | Total Loss: 2.44 | Total Steps: 127
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3910/200000  | Episode Reward: 10.0  | Average Reward 3.81  | Actor loss: -0.15 | Critic loss: 2.81 | Entropy loss: -0.0187  | Total Loss: 2.64 | Total Steps: 131
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3911/200000  | Episode Reward: 10.0  | Average Reward 3.85  | Actor loss: 1.22 | Critic loss: 5.09 | Entropy loss: -0.0037  | Total Loss: 6.30 | Total Steps: 19
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3912/200000  | Episode Reward: 4.0  | Average Reward 3.85  | Actor loss: -0.35 | Critic loss: 4.86 | Entropy loss: -0.0111  | Total Loss: 4.50 | Total Steps: 144
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3936/200000  | Episode Reward: 4.0  | Average Reward 3.82  | Actor loss: -0.39 | Critic loss: 4.60 | Entropy loss: -0.0179  | Total Loss: 4.18 | Total Steps: 110
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3937/200000  | Episode Reward: 0.0  | Average Reward 3.83  | Actor loss: -0.50 | Critic loss: 5.14 | Entropy loss: -0.0245  | Total Loss: 4.61 | Total Steps: 188
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3938/200000  | Episode Reward: 1.0  | Average Reward 3.81  | Actor loss: -0.33 | Critic loss: 4.77 | Entropy loss: -0.0199  | Total Loss: 4.42 | Total Steps: 1

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3960/200000  | Episode Reward: -2.0  | Average Reward 3.89  | Actor loss: -0.72 | Critic loss: 7.20 | Entropy loss: -0.0215  | Total Loss: 6.46 | Total Steps: 152
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3961/200000  | Episode Reward: -5.0  | Average Reward 3.81  | Actor loss: -0.40 | Critic loss: 7.88 | Entropy loss: -0.0142  | Total Loss: 7.46 | Total Steps: 144
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3962/200000  | Episode Reward: 10.0  | Average Reward 3.86  | Actor loss: 0.88 | Critic loss: 6.03 | Entropy loss: -0.0022  | Total 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3983/200000  | Episode Reward: 4.0  | Average Reward 3.65  | Actor loss: -0.26 | Critic loss: 4.83 | Entropy loss: -0.0146  | Total Loss: 4.55 | Total Steps: 115
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3984/200000  | Episode Reward: 10.0  | Average Reward 3.67  | Actor loss: 1.52 | Critic loss: 6.06 | Entropy loss: -0.0030  | Total Loss: 7.57 | Total Steps: 19
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 3985/200000  | Episode Reward: 4.0  | Average Reward 3.70  | Actor loss: -0.60 | Critic loss: 4.40 | Entropy loss: -0.0195  | Total Loss: 3.78 | Total Steps: 152
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4008/200000  | Episode Reward: 4.0  | Average Reward 3.77  | Actor loss: -0.28 | Critic loss: 4.30 | Entropy loss: -0.0199  | Total Loss: 4.00 | Total Steps: 185
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4009/200000  | Episode Reward: 4.0  | Average Reward 3.75  | Actor loss: -0.31 | Critic loss: 4.35 | Entropy loss: -0.0097  | Total Loss: 4.02 | Total Steps: 102
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4010/200000  | Episode Reward: 4.0  | Average Reward 3.73  | Actor loss: -0.42 | Critic loss: 4.26 | Entropy loss: -0.0197  | Total Loss: 3.82 | Total Steps: 158
--- target colour: yellow, target object: c

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4033/200000  | Episode Reward: -5.0  | Average Reward 4.00  | Actor loss: -0.91 | Critic loss: 5.58 | Entropy loss: -0.0253  | Total Loss: 4.65 | Total Steps: 224
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4034/200000  | Episode Reward: 10.0  | Average Reward 4.01  | Actor loss: 2.25 | Critic loss: 6.32 | Entropy loss: -0.0053  | Total Loss: 8.56 | Total Steps: 19
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4035/200000  | Episode Reward: -8.0  | Average Reward 3.92  | Actor loss: -0.73 | Cr

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4057/200000  | Episode Reward: 4.0  | Average Reward 3.91  | Actor loss: -0.18 | Critic loss: 5.21 | Entropy loss: -0.0099  | Total Loss: 5.01 | Total Steps: 111
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4058/200000  | Episode Reward: 7.0  | Average Reward 3.94  | Actor loss: 0.14 | Critic loss: 4.05 | Entropy loss: -0.0169  | Total Loss: 4.17 | Total Steps: 153
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4059/200000  | Episode Reward: 4.0  | Average Reward 3.91  | Actor loss: -0.32 | Critic loss: 5.06 | Entropy loss: -0.0128  | Total Loss: 4.72 | Total Steps: 161
--- target colour: green, target object: prism ---
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4082/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.67 | Critic loss: 4.57 | Entropy loss: -0.0258  | Total Loss: 3.87 | Total Steps: 153
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4083/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.42 | Critic loss: 4.51 | Entropy loss: -0.0177  | Total Loss: 4.07 | Total Steps: 185
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4084/200000  | Episode Reward: -2.0  | Average Reward 3.94  | Actor loss: -0.62 | Critic loss: 5.07 | Entropy loss: -0.0251  | Total Loss: 4.42 | Total St

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4108/200000  | Episode Reward: 1.0  | Average Reward 4.11  | Actor loss: -0.57 | Critic loss: 4.55 | Entropy loss: -0.0208  | Total Loss: 3.96 | Total Steps: 153
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4109/200000  | Episode Reward: 4.0  | Average Reward 4.09  | Actor loss: -0.55 | Critic loss: 5.78 | Entropy loss: -0.0167  | Total Loss: 5.22 | Total Steps: 113
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4110/200000  | Episode Reward: 10.0  | Average Reward 4.09  | Actor loss: 0.17 | Critic loss: 3.77 | Entropy loss: -0.0028  | Total Loss: 3.94 | Total Steps: 24
--- target colour: blue, target object: prism ---
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4132/200000  | Episode Reward: 1.0  | Average Reward 3.81  | Actor loss: -0.54 | Critic loss: 6.04 | Entropy loss: -0.0236  | Total Loss: 5.48 | Total Steps: 154
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4133/200000  | Episode Reward: -2.0  | Average Reward 3.76  | Actor loss: -0.91 | Critic loss: 6.23 | Entropy loss: -0.0278  | Total Loss: 5.29 | Total Steps: 230
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4134/200000  | Episode Reward: 4.0  | Average Reward 3.78  | Actor loss: -0.29 | Critic loss: 5.32 | Entropy loss: -0.0170  | Total Loss: 5.00 | Total Steps: 142
--- target colour: red, target object: cyli

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4158/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.43 | Critic loss: 7.38 | Entropy loss: -0.0080  | Total Loss: 7.81 | Total Steps: 71
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 4159/200000  | Episode Reward: -8.0  | Average Reward 4.13  | Actor loss: -1.00 | Critic loss: 7.41 | Entropy loss: -0.0314  | Total Loss: 6.39 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4160/200000  | Episode Reward: 10.0  | Average Reward 4.19  | Actor loss: 0.27 | Critic loss: 3.19 | Entropy loss: -0.0014  | Total Loss: 3.47 | Total Steps: 21
--- target colour: blue, target obje

Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4183/200000  | Episode Reward: 1.0  | Average Reward 4.55  | Actor loss: -0.51 | Critic loss: 3.93 | Entropy loss: -0.0157  | Total Loss: 3.40 | Total Steps: 385
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4184/200000  | Episode Reward: -5.0  | Average Reward 4.48  | Actor loss: -0.42 | Critic loss: 5.81 | Entropy loss: -0.0204  | Total Loss: 5.37 | Total Steps: 174
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4185/200000  | Episode Reward: 1.0  | Average Reward 4.46  | Actor loss: -0.15 | Critic loss: 5.99 | Entropy loss: -0.017

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4208/200000  | Episode Reward: 1.0  | Average Reward 4.49  | Actor loss: -0.80 | Critic loss: 5.92 | Entropy loss: -0.0146  | Total Loss: 5.10 | Total Steps: 131
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4209/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: 0.35 | Critic loss: 3.67 | Entropy loss: -0.0015  | Total Loss: 4.02 | Total Steps: 18
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4210/200000  | Episode Reward: 1.0  | Average Reward 4.50  | Actor loss: -0.47 | Critic loss: 4.42 | Entropy loss: -0.0165  | Total Loss: 3.93 | Total Steps: 227
--- target colour: black, target object: p

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4234/200000  | Episode Reward: 4.0  | Average Reward 4.70  | Actor loss: -0.75 | Critic loss: 6.14 | Entropy loss: -0.0112  | Total Loss: 5.38 | Total Steps: 102
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4235/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.07 | Critic loss: 4.83 | Entropy loss: -0.0120  | Total Loss: 4.75 | Total Steps: 106
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4236/200000  | Episode Reward: 7.0  | Average Reward 4.77  | Actor loss: -0.27 | Critic loss: 2.98 | Entropy loss: -0.0081  | Total Loss: 2.70 | Total Steps: 112
--- target colour: red, target object: cube ---
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4259/200000  | Episode Reward: 10.0  | Average Reward 4.88  | Actor loss: -0.60 | Critic loss: 3.80 | Entropy loss: -0.0263  | Total Loss: 3.17 | Total Steps: 166
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4260/200000  | Episode Reward: 10.0  | Average Reward 4.91  | Actor loss: -0.41 | Critic loss: 2.99 | Entropy loss: -0.0217  | Total Loss: 2.56 | Total Steps: 169
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4261/200000  | Episode Reward: 10.0  | Average Reward 4.91  | Actor loss: 0.42 | Critic loss: 3.66 | Entropy loss: -0.0025  | Total Loss: 4.07 | Total Steps: 19
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4262/200000  | Episode Reward: 10.0  | Average Reward 5.00  | Actor lo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4285/200000  | Episode Reward: 1.0  | Average Reward 5.00  | Actor loss: -0.43 | Critic loss: 4.48 | Entropy loss: -0.0239  | Total Loss: 4.03 | Total Steps: 217
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4286/200000  | Episode Reward: -8.0  | Average Reward 4.91  | Actor loss: -0.67 | Critic loss: 6.63 | Entropy loss: -0.0197  | Total Loss: 5.95 | Total Steps: 225
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4287/200000  | Episode Reward: 4.0  | Average Reward 4.95  | Actor loss: -0.35 |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4310/200000  | Episode Reward: 9.0  | Average Reward 4.76  | Actor loss: 0.27 | Critic loss: 3.36 | Entropy loss: -0.0262  | Total Loss: 3.61 | Total Steps: 122
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4311/200000  | Episode Reward: 7.0  | Average Reward 4.75  | Actor loss: -0.32 | Critic loss: 3.07 | Entropy loss: -0.0260  | Total Loss: 2.73 | Total Steps: 155
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4312/200000  | Episode Reward: 10.0  | Average Reward 4.85  | Actor loss: 0.33 | Critic loss: 3.33 | Entropy loss: -0.0021  | Total Loss: 3.66 | Total Steps: 19
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4313/200000  | Episode Re

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4335/200000  | Episode Reward: 7.0  | Average Reward 4.91  | Actor loss: -0.30 | Critic loss: 3.85 | Entropy loss: -0.0098  | Total Loss: 3.54 | Total Steps: 157
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4336/200000  | Episode Reward: 10.0  | Average Reward 4.94  | Actor loss: 0.56 | Critic loss: 3.39 | Entropy loss: -0.0018  | Total Loss: 3.95 | Total Steps: 17
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4337/200000  | Episode Reward: 2.0  | Average Reward 4.90  | Actor loss: -0.49 | Critic loss: 7.07 | Entropy loss: -0.0196  | Total Loss: 6.56 | Total Steps: 105
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4361/200000  | Episode Reward: 10.0  | Average Reward 4.88  | Actor loss: 0.17 | Critic loss: 6.95 | Entropy loss: -0.0063  | Total Loss: 7.11 | Total Steps: 61
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4362/200000  | Episode Reward: 3.0  | Average Reward 4.88  | Actor loss: -0.34 | Critic loss: 6.19 | Entropy loss: -0.0198  | Total Loss: 5.84 | Total Steps: 108
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4363/200000  | Episode Reward: 7.0  | Average Reward 4.86  | Actor loss: -0.44 | Critic loss: 3.57 | Entropy loss: -0.0275  | Total Loss: 3.10 | Total Steps: 228
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episo

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4385/200000  | Episode Reward: 0.0  | Average Reward 4.75  | Actor loss: -0.56 | Critic loss: 5.72 | Entropy loss: -0.0195  | Total Loss: 5.14 | Total Steps: 139
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4386/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0.22 | Critic loss: 3.24 | Entropy loss: -0.0011  | Total Loss: 3.46 | Total Steps: 19
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4387/200000  | Episode Reward: -12.0  | Average Reward 4.70  | Actor loss: -0.97 | Critic loss: 5.02 | Entropy loss: -0.0113  | Total Loss: 4.04 | Tota

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4410/200000  | Episode Reward: 10.0  | Average Reward 4.70  | Actor loss: 0.21 | Critic loss: 3.43 | Entropy loss: -0.0164  | Total Loss: 3.61 | Total Steps: 106
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4411/200000  | Episode Reward: 10.0  | Average Reward 4.73  | Actor loss: 0.39 | Critic loss: 3.79 | Entropy loss: -0.0016  | Total Loss: 4.17 | Total Steps: 17
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4412/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 1.95 | Critic loss: 6.31 | Entropy loss: -0.0055  | Total Loss: 8.26 | Total Steps: 21
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4413/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4437/200000  | Episode Reward: 3.0  | Average Reward 4.76  | Actor loss: -0.64 | Critic loss: 5.36 | Entropy loss: -0.0177  | Total Loss: 4.70 | Total Steps: 141
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4438/200000  | Episode Reward: -5.0  | Average Reward 4.75  | Actor loss: 0.07 | Critic loss: 4.49 | Entropy loss: -0.0003  | Total Loss: 4.56 | Total Steps: 273
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4439/200000  | Episode Reward: 1.0  | Average Reward 4.73  | Actor loss: -0.43 | Critic loss: 5.00 | E

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4461/200000  | Episode Reward: -8.0  | Average Reward 4.57  | Actor loss: -0.51 | Critic loss: 8.44 | Entropy loss: -0.0181  | Total Loss: 7.92 | Total Steps: 240
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4462/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.75 | Critic loss: 6.25 | Entropy loss: -0.0308  | Total Loss: 5.47 | Total Steps: 241
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4463/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: 0.52 | Critic loss: 4.19 | Entropy loss: -0.0017  | Total Loss: 4.71 | Total Steps: 17
--- target colour: blue, target object: c

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4487/200000  | Episode Reward: -2.0  | Average Reward 4.64  | Actor loss: -0.55 | Critic loss: 6.12 | Entropy loss: -0.0152  | Total Loss: 5.55 | Total Steps: 162
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4488/200000  | Episode Reward: 4.0  | Average Reward 4.61  | Actor loss: -0.28 | Critic loss: 4.78 | Entropy loss: -0.0251  | Total Loss: 4.48 | Total Steps: 151
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4489/200000  | Episode Reward: 4.0  | Average Reward 4.58  | Actor loss: -0.43 | Critic loss: 3.70 | Entropy loss: -0.0209  | Total Loss: 3.25 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4512/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: 0.53 | Critic loss: 4.67 | Entropy loss: -0.0013  | Total Loss: 5.20 | Total Steps: 16
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4513/200000  | Episode Reward: 0.0  | Average Reward 4.48  | Actor loss: -0.36 | Critic loss: 5.12 | Entropy loss: -0.0387  | Total Loss: 4.73 | Total Steps: 165
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4514/200000  | Episode Reward: 10.0  | Average Reward 4.53  | Actor loss: 0.12 | Critic loss: 3.81 | Entropy loss: -0.0014  | Total Loss: 3.93 | Total Steps: 23
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4537/200000  | Episode Reward: 1.0  | Average Reward 4.46  | Actor loss: -0.59 | Critic loss: 5.41 | Entropy loss: -0.0267  | Total Loss: 4.79 | Total Steps: 142
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4538/200000  | Episode Reward: 4.0  | Average Reward 4.46  | Actor loss: -0.46 | Critic loss: 5.05 | Entropy loss: -0.0182  | Total Loss: 4.57 | Total Steps: 119
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4539/200000  | Episode Reward: 10.0  | Average Reward 4.48  | Actor loss: 0.34 | Critic loss: 3.44 | Entropy loss: -0.0030  | Total Loss: 3.77 | Total Steps: 31
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Ep

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4562/200000  | Episode Reward: -2.0  | Average Reward 4.25  | Actor loss: -1.07 | Critic loss: 5.97 | Entropy loss: -0.0256  | Total Loss: 4.87 | Total Steps: 155
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4563/200000  | Episode Reward: 10.0  | Average Reward 4.26  | Actor loss: 0.49 | Critic loss: 5.35 | Entropy loss: -0.0014  | Total Loss: 5.85 | Total Steps: 16
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4564/200000  | Episode Reward: 10.0  | Average Reward 4.26  | Actor loss: 0.09 | Critic loss: 4.64 | Entropy loss: -0.0229  | Total Loss: 4.70 | Total Steps: 97
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4586/200000  | Episode Reward: 4.0  | Average Reward 4.09  | Actor loss: -0.23 | Critic loss: 4.60 | Entropy loss: -0.0186  | Total Loss: 4.35 | Total Steps: 134
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4587/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.02 | Critic loss: 3.17 | Entropy loss: -0.0151  | Total Loss: 3.18 | Total Steps: 116
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4588/200000  | Episode Reward: 10.0  | Average Reward 4.25  | Actor loss: 0.63 | Critic loss: 6.49 | Entropy loss: -0.0013  | Total Loss: 7.11 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4589/200000  | Episo

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4612/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.43 | Critic loss: 3.34 | Entropy loss: -0.0295  | Total Loss: 2.87 | Total Steps: 245
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4613/200000  | Episode Reward: -2.0  | Average Reward 4.16  | Actor loss: -0.53 | Critic loss: 5.42 | Entropy loss: -0.0158  | Total Loss: 4.87 | Total Steps: 172
--- target colour: blue, target object: prism ---
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4614/200000  | Episode Reward: 10.0  | Average Reward 4.18  | Actor loss: 0.98 | Critic loss: 11.74 | Entropy loss: -0.0020  | Total Loss: 12.72 | Total Steps: 281
--- target colour: black, target

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4637/200000  | Episode Reward: -2.0  | Average Reward 4.02  | Actor loss: -0.43 | Critic loss: 5.82 | Entropy loss: -0.0160  | Total Loss: 5.38 | Total Steps: 173
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4638/200000  | Episode Reward: 4.0  | Average Reward 4.07  | Actor loss: -0.34 | Critic loss: 4.59 | Entropy loss: -0.0170  | Total Loss: 4.23 | Total Steps: 174
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4639/200000  | Episode Reward: 10.0  | Average Reward 4.11  | Actor loss: 1.14 | Critic loss: 4.84 | Entropy loss: -0.0026  | Total Loss: 5.98 | Total Steps: 17
--- target colour: red, target object: cylinder 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4662/200000  | Episode Reward: 4.0  | Average Reward 4.05  | Actor loss: -0.36 | Critic loss: 4.64 | Entropy loss: -0.0136  | Total Loss: 4.27 | Total Steps: 169
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4663/200000  | Episode Reward: -8.0  | Average Reward 3.96  | Actor loss: -0.80 | Critic loss: 8.28 | Entropy loss: -0.0203  | Total Loss: 7.46 | Total Steps: 181
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4664/200000  | Episode Reward: 10.0  | Average Reward 3.96  | Actor loss: 0.03 | Critic loss: 4.17 | Entropy loss: -0.0097  | Total Loss: 4.19 | Total Steps: 77

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4686/200000  | Episode Reward: 4.0  | Average Reward 3.67  | Actor loss: -0.69 | Critic loss: 4.35 | Entropy loss: -0.0226  | Total Loss: 3.63 | Total Steps: 126
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4687/200000  | Episode Reward: -5.0  | Average Reward 3.65  | Actor loss: -0.85 | Critic loss: 5.42 | Entropy loss: -0.0296  | Total Loss: 4.55 | Total Steps: 235
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4688/200000  | Episode Reward: 1.0  | Average Reward 3.63  | Actor loss: -0.44 | Critic loss: 5.29 | Entropy 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4712/200000  | Episode Reward: 4.0  | Average Reward 3.83  | Actor loss: -0.47 | Critic loss: 6.61 | Entropy loss: -0.0113  | Total Loss: 6.13 | Total Steps: 101
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4713/200000  | Episode Reward: 10.0  | Average Reward 3.88  | Actor loss: 1.44 | Critic loss: 4.28 | Entropy loss: -0.0039  | Total Loss: 5.71 | Total Steps: 20
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4714/200000  | Episode Reward: 10.0  | Average Reward 3.88  | Actor loss: 0.97 | Critic loss: 3.14 | Entropy loss: -0.0035  | Total Loss: 4.10 | Total Steps: 18
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4715/200000  | Episode 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4738/200000  | Episode Reward: -5.0  | Average Reward 3.97  | Actor loss: -0.53 | Critic loss: 6.59 | Entropy loss: -0.0171  | Total Loss: 6.05 | Total Steps: 170
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4739/200000  | Episode Reward: 10.0  | Average Reward 3.97  | Actor loss: 0.37 | Critic loss: 3.98 | Entropy loss: -0.0016  | Total Loss: 4.35 | Total Steps: 18
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4740/200000  | Episode Reward: 5.0  | Average Reward 3.94  | Actor loss: -0.25 | Critic loss: 3.86 | Entropy loss: -0.0224  | Total Loss: 3.59 | Total Steps: 149
--- target colour: green, target object: pri

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4764/200000  | Episode Reward: 10.0  | Average Reward 4.25  | Actor loss: 0.44 | Critic loss: 4.10 | Entropy loss: -0.0013  | Total Loss: 4.53 | Total Steps: 16
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4765/200000  | Episode Reward: 1.0  | Average Reward 4.21  | Actor loss: -0.99 | Critic loss: 6.02 | Entropy loss: -0.0325  | Total Loss: 4.99 | Total Steps: 196
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4766/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.32 | Critic loss: 4.44 | Entropy loss: -0.0159  | Total Loss: 4.10 | Total Steps: 179
--- target colour: green, target object: capsule ---
Decision Step reward: -

Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4788/200000  | Episode Reward: 10.0  | Average Reward 4.12  | Actor loss: -0.31 | Critic loss: 2.90 | Entropy loss: -0.0161  | Total Loss: 2.57 | Total Steps: 410
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4789/200000  | Episode Reward: -2.0  | Average Reward 4.05  | Actor loss: -0.37 | Critic loss: 4.64 | Entropy loss: -0.0255  | Total Loss: 4.24 | Total Steps: 238
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4790/200000  | Episode Reward: 4.0  | Average Reward 4.07  | Actor loss: -0.03 | Critic loss: 5.03 | Entropy loss: -0.0143  | Total Loss: 4.98 | Total Steps: 103
--- target colour: green, target ob

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4814/200000  | Episode Reward: -5.0  | Average Reward 4.02  | Actor loss: -0.66 | Critic loss: 6.05 | Entropy loss: -0.0216  | Total Loss: 5.36 | Total Steps: 219
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4815/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: 1.14 | Critic loss: 3.88 | Entropy loss: -0.0028  | Total Loss: 5.02 | Total Steps: 19
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4816/200000  | Episode Reward: 10.0  | Average Reward 4.11  | Actor loss: -0.16 | Critic loss: 4.08 | Entropy loss: -0.0097  | Total Loss: 3.91 | Total Steps: 101
--- target colour: yellow, target object: cube ---
Agent in terminal step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4840/200000  | Episode Reward: 7.0  | Average Reward 4.18  | Actor loss: 0.02 | Critic loss: 5.96 | Entropy loss: -0.0095  | Total Loss: 5.97 | Total Steps: 95
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 4841/200000  | Episode Reward: -31.0  | Average Reward 4.02  | Actor loss: -1.01 | Critic loss: 7.89 | Entropy loss: -0.0336  | Total Loss: 6.86 | Total Steps: 500
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4842/200000  | Episode Reward: 10.0  | Average Reward 4.08  | Actor loss: 0.70 | Critic loss: 2.91 | Entropy loss: -0.0046  | Total Loss: 3.61 | Total Steps: 27
--- t

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4863/200000  | Episode Reward: 10.0  | Average Reward 3.98  | Actor loss: 1.40 | Critic loss: 5.33 | Entropy loss: -0.0037  | Total Loss: 6.72 | Total Steps: 19
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4864/200000  | Episode Reward: 7.0  | Average Reward 3.96  | Actor loss: -0.32 | Critic loss: 4.38 | Entropy loss: -0.0143  | Total Loss: 4.04 | Total Steps: 170
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4865/200000  | Episode Reward: 7.0  | Average Reward 3.98  | Actor loss: -0.36 | Critic loss: 5.03 | Entropy loss: -0.0246  | Total Loss: 4.64 | Total Steps: 119
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4888/200000  | Episode Reward: 10.0  | Average Reward 4.16  | Actor loss: 0.18 | Critic loss: 7.04 | Entropy loss: -0.0067  | Total Loss: 7.21 | Total Steps: 70
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4889/200000  | Episode Reward: 7.0  | Average Reward 4.14  | Actor loss: -0.43 | Critic loss: 3.13 | Entropy loss: -0.0208  | Total Loss: 2.68 | Total Steps: 150
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4890/200000  | Episode Reward: 10.0  | Average Reward 4.14  | Actor loss: -0.01 | Critic loss: 2.96 | Entropy loss: -0.0122  | Total Loss: 2.93 | Total Steps: 115
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4891/200000  | Episode Reward: 10.0  | Aver

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4913/200000  | Episode Reward: 10.0  | Average Reward 4.00  | Actor loss: 0.22 | Critic loss: 3.11 | Entropy loss: -0.0015  | Total Loss: 3.33 | Total Steps: 19
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4914/200000  | Episode Reward: 1.0  | Average Reward 3.95  | Actor loss: -0.07 | Critic loss: 4.50 | Entropy loss: -0.0222  | Total Loss: 4.41 | Total Steps: 174
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4915/200000  | Episode Reward: 10.0  | Average Reward 3.95  | Actor loss: 0.08 | Critic loss: 3.53 | Entropy loss: -0.0012  | Total Loss: 3.61 | Total Steps: 23
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4939/200000  | Episode Reward: 4.0  | Average Reward 4.10  | Actor loss: -0.24 | Critic loss: 5.93 | Entropy loss: -0.0103  | Total Loss: 5.69 | Total Steps: 102
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4940/200000  | Episode Reward: 7.0  | Average Reward 4.11  | Actor loss: -0.38 | Critic loss: 3.86 | Entropy loss: -0.0247  | Total Loss: 3.46 | Total Steps: 184
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4941/200000  | Episode Reward: 10.0  | Average Reward 4.11  | Actor loss: 0.40 | Critic loss: 2.87 | Entropy loss: -0.0016  | Total Loss: 3.26 | Total Steps: 17
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4964/200000  | Episode Reward: 10.0  | Average Reward 3.83  | Actor loss: 0.55 | Critic loss: 3.96 | Entropy loss: -0.0017  | Total Loss: 4.51 | Total Steps: 17
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4965/200000  | Episode Reward: 4.0  | Average Reward 3.84  | Actor loss: -0.25 | Critic loss: 6.74 | Entropy loss: -0.0093  | Total Loss: 6.48 | Total Steps: 102
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4966/200000  | Episode Reward: 10.0  | Average Reward 3.87  | Actor loss: 0.10 | Critic loss: 4.97 | Entropy loss: -0.0008  | Total Loss: 5.07 | Total Steps: 21
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
D

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4991/200000  | Episode Reward: 10.0  | Average Reward 4.53  | Actor loss: -0.25 | Critic loss: 2.69 | Entropy loss: -0.0086  | Total Loss: 2.43 | Total Steps: 109
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4992/200000  | Episode Reward: 10.0  | Average Reward 4.62  | Actor loss: -0.21 | Critic loss: 3.11 | Entropy loss: -0.0150  | Total Loss: 2.88 | Total Steps: 131
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 4993/200000  | Episode Reward: 4.0  | Average Reward 4.61  | Actor loss: -0.59 | Critic loss: 5.07 | Entropy loss: -0.0126  | Total Loss: 4.46 | Total Steps: 143
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5016/200000  | Episode Reward: 7.0  | Average Reward 4.43  | Actor loss: -0.46 | Critic loss: 3.63 | Entropy loss: -0.0293  | Total Loss: 3.14 | Total Steps: 147
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5017/200000  | Episode Reward: 10.0  | Average Reward 4.43  | Actor loss: 1.10 | Critic loss: 3.55 | Entropy loss: -0.0031  | Total Loss: 4.65 | Total Steps: 19
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5018/200000  | Episode Reward: 10.0  | Average Reward 4.46  | Actor loss: 0.47 | Critic loss: 2.85 | Entropy loss: -0.0019  | Total Loss: 3.32 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5042/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.75 | Critic loss: 5.73 | Entropy loss: -0.0238  | Total Loss: 4.96 | Total Steps: 168
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5043/200000  | Episode Reward: 4.0  | Average Reward 4.59  | Actor loss: -0.64 | Critic loss: 4.37 | Entropy loss: -0.0197  | Total Loss: 3.71 | Total Steps: 168
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5044/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.56 | Critic loss: 6.27 | Entropy loss: -0.0158  | Total Loss: 5.70 | Total Steps: 144
--- target colour: black, target object: cube

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5067/200000  | Episode Reward: 10.0  | Average Reward 4.70  | Actor loss: 0.30 | Critic loss: 2.44 | Entropy loss: -0.0016  | Total Loss: 2.74 | Total Steps: 18
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5068/200000  | Episode Reward: -11.0  | Average Reward 4.65  | Actor loss: 0.01 | Critic loss: 2.90 | Entropy loss: -0.0029  | Total Loss: 2.90 | Total Steps: 293
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5069/200000  | Episode Reward: 10.0  | Average Reward 4.65  | Actor loss: 0.29 | Critic loss: 5.65 | Entropy loss: -0.0162  | Total Loss: 5.92 | Total Steps: 87
--- target col

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5093/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.46 | Critic loss: 2.83 | Entropy loss: -0.0018  | Total Loss: 3.29 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5094/200000  | Episode Reward: 7.0  | Average Reward 4.81  | Actor loss: -0.29 | Critic loss: 5.96 | Entropy loss: -0.0107  | Total Loss: 5.66 | Total Steps: 95
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5095/200000  | Episode Reward: 10.0  | Average Reward 4.84  | Actor loss: 0.47 | Critic loss: 3.00 | Entropy loss: -0.0017  | Total Loss: 3.46 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Max Step Reward: -10
Step: 500
Training  | Episode: 5117/200000  | Episode Reward: -46.0  | Average Reward 4.40  | Actor loss: -0.82 | Critic loss: 12.37 | Entropy loss: -0.0211  | Total Loss: 11.53 | Total Steps: 500
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 5118/200000  | Episode Reward: -35.0  | Average Reward 4.21  | Actor loss: -0.59 | Critic loss: 7.63 | Entropy loss: -0.0189  | Total Loss: 7.02 | Total Steps: 500
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5119/200000  | Episode Reward: 4.0  | Average Reward 4.24  | Actor loss: 0.05 | Critic loss: 2.85 | Entropy loss: -0.0242  | Total 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5143/200000  | Episode Reward: 10.0  | Average Reward 4.25  | Actor loss: 0.30 | Critic loss: 2.62 | Entropy loss: -0.0118  | Total Loss: 2.90 | Total Steps: 103
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5144/200000  | Episode Reward: 10.0  | Average Reward 4.30  | Actor loss: 0.53 | Critic loss: 3.60 | Entropy loss: -0.0014  | Total Loss: 4.12 | Total Steps: 16
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5145/200000  | Episode Reward: 4.0  | Average Reward 4.30  | Actor loss: -0.35 | Critic loss: 4.80 | Entropy loss: -0.0111  | Total Loss: 4.43 | Total Steps: 113
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5169/200000  | Episode Reward: 1.0  | Average Reward 4.55  | Actor loss: -0.79 | Critic loss: 5.64 | Entropy loss: -0.0186  | Total Loss: 4.83 | Total Steps: 138
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5170/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.26 | Critic loss: 2.17 | Entropy loss: -0.0015  | Total Loss: 2.43 | Total Steps: 18
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5171/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: -0.26 | Critic loss: 2.66 | Entropy loss: -0.0191  | Total Loss: 2.38 | Total Steps: 114
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5195/200000  | Episode Reward: 4.0  | Average Reward 4.34  | Actor loss: -0.31 | Critic loss: 4.83 | Entropy loss: -0.0110  | Total Loss: 4.50 | Total Steps: 149
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5196/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.15 | Critic loss: 2.00 | Entropy loss: -0.0013  | Total Loss: 2.14 | Total Steps: 19
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5197/200000  | Episode Reward: 9.0  | Average Reward 4.37  | Actor loss: -0.32 | Critic loss: 2.60 | Entropy loss: -0.0124  | Total Loss: 2.26 | Total Steps: 138
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step r

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5221/200000  | Episode Reward: 4.0  | Average Reward 4.39  | Actor loss: -0.28 | Critic loss: 5.71 | Entropy loss: -0.0117  | Total Loss: 5.42 | Total Steps: 103
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5222/200000  | Episode Reward: 1.0  | Average Reward 4.39  | Actor loss: -0.49 | Critic loss: 4.98 | Entropy loss: -0.0130  | Total Loss: 4.48 | Total Steps: 148
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5223/200000  | Episode Reward: 4.0  | Average Reward 4.36  | Actor loss: -0.30 | Critic loss: 5.63 | Entropy loss: -0.0163  | Total Loss: 5.31 | Total Steps: 109
--- target colour

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5247/200000  | Episode Reward: 1.0  | Average Reward 4.30  | Actor loss: -0.66 | Critic loss: 5.77 | Entropy loss: -0.0133  | Total Loss: 5.09 | Total Steps: 155
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5248/200000  | Episode Reward: 4.0  | Average Reward 4.30  | Actor loss: -0.33 | Critic loss: 6.34 | Entropy loss: -0.0189  | Total Loss: 5.99 | Total Steps: 109
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5249/200000  | Episode Reward: 4.0  | Average Reward 4.28  | Actor loss: -0.18 | Critic loss: 4.72 | Entropy loss: -0.0209  | Total Loss: 4.51 | Total Steps: 127
--- target colo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5273/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: 0.21 | Critic loss: 4.26 | Entropy loss: -0.0130  | Total Loss: 4.45 | Total Steps: 101
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5274/200000  | Episode Reward: 7.0  | Average Reward 4.74  | Actor loss: -0.21 | Critic loss: 4.22 | Entropy loss: -0.0134  | Total Loss: 3.99 | Total Steps: 158
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5275/200000  | Episode Reward: 10.0  | Average Reward 4.74  | Actor loss: 0.58 | Critic loss: 3.27 | Entropy loss: -0.0019  | Total Loss: 3.84 | Total Steps: 17
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5299/200000  | Episode Reward: 10.0  | Average Reward 4.60  | Actor loss: -0.15 | Critic loss: 3.60 | Entropy loss: -0.0114  | Total Loss: 3.44 | Total Steps: 106
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5300/200000  | Episode Reward: 1.0  | Average Reward 4.58  | Actor loss: -0.31 | Critic loss: 6.03 | Entropy loss: -0.0177  | Total Loss: 5.70 | Total Steps: 153
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5301/200000  | Episode Reward: 7.0  | Average Reward 4.58  | Actor loss: -0.15 | Critic loss: 4.34 | Entropy loss: -0.0116  | Total Loss: 4.18 | Total Steps: 147
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.48. Model has been saved-----
Training  | Episode: 5324/200000  | Episode Reward: 10.0  | Average Reward 5.48  | Actor loss: 1.40 | Critic loss: 4.63 | Entropy loss: -0.0048  | Total Loss: 6.03 | Total Steps: 24
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5325/200000  | Episode Reward: 10.0  | Average Reward 5.48  | Actor loss: 0.36 | Critic loss: 2.51 | Entropy loss: -0.0019  | Total Loss: 2.87 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5326/200000  | Episode Reward: 4.0  | Average Reward 5.46  | Actor loss: -0.73 | Critic loss: 6.72 | Entropy loss: -0.0152  | Total Loss: 5.97 | Total Steps: 102
--- target colour: black, target object: cylinder -

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5348/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.19 | Critic loss: 2.81 | Entropy loss: -0.0006  | Total Loss: 3.00 | Total Steps: 17
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5349/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.24 | Critic loss: 3.15 | Entropy loss: -0.0011  | Total Loss: 3.39 | Total Steps: 17
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5350/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.65 | Critic loss: 2.62 | Entropy loss: -0.0019  | Total Loss: 3.27 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5351/200000  | Episode Reward: 7.0  | Average Rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5374/200000  | Episode Reward: 10.0  | Average Reward 5.00  | Actor loss: 0.27 | Critic loss: 2.66 | Entropy loss: -0.0014  | Total Loss: 2.93 | Total Steps: 18
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5375/200000  | Episode Reward: 7.0  | Average Reward 5.01  | Actor loss: -0.52 | Critic loss: 5.28 | Entropy loss: -0.0134  | Total Loss: 4.75 | Total Steps: 131
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5376/200000  | Episode Reward: 10.0  | Average Reward 5.01  | Actor loss: 0.11 | Critic loss: 3.39 | Entropy loss: -0.0224  | Total Loss: 3.48 | Total Steps: 124
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5400/200000  | Episode Reward: 10.0  | Average Reward 5.05  | Actor loss: 0.38 | Critic loss: 3.58 | Entropy loss: -0.0013  | Total Loss: 3.96 | Total Steps: 17
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5401/200000  | Episode Reward: 6.0  | Average Reward 5.06  | Actor loss: -0.46 | Critic loss: 4.37 | Entropy loss: -0.0261  | Total Loss: 3.88 | Total Steps: 164
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5402/200000  | Episode Reward: -5.0  | Average Reward 4.99  | Actor loss: -1.13 | Critic loss: 8.12 | Entropy loss: -0.0190  | Total Loss: 6.97 | Total Steps: 171
--- target colour: red, target object: pri

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 5424/200000  | Episode Reward: -40.0  | Average Reward 4.79  | Actor loss: -0.75 | Critic loss: 12.19 | Entropy loss: -0.0202  | Total Loss: 11.42 | Total Steps: 500
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5425/200000  | Episode Reward: 10.0  | Average Reward 4.86  | Actor loss: -0.06 | Critic loss: 2.63 | Entropy loss: -0.0153  | Total Loss: 2.55 | Total Steps: 151
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5426/200000  | Episode Reward: 10.0  | Average Reward 4.86  | Actor loss: 0.25 | Critic loss: 3.05 | Entropy loss: -0.0015  | Total Loss: 3.30 | Total Steps: 19
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | E

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5449/200000  | Episode Reward: 7.0  | Average Reward 4.71  | Actor loss: 0.16 | Critic loss: 7.30 | Entropy loss: -0.0056  | Total Loss: 7.45 | Total Steps: 79
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5450/200000  | Episode Reward: 4.0  | Average Reward 4.71  | Actor loss: -0.36 | Critic loss: 7.02 | Entropy loss: -0.0064  | Total Loss: 6.65 | Total Steps: 95
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5451/200000  | Episode Reward: 4.0  | Average Reward 4.68  | Actor loss: -0.19 | Critic loss: 6.04 | Entropy loss: -0.0179  | Total Loss: 5.83 | Total Steps: 107
--- target colour: red, target object: prism ---
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5474/200000  | Episode Reward: 10.0  | Average Reward 4.46  | Actor loss: -0.10 | Critic loss: 3.65 | Entropy loss: -0.0142  | Total Loss: 3.54 | Total Steps: 107
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 5475/200000  | Episode Reward: -35.0  | Average Reward 4.23  | Actor loss: -0.83 | Critic loss: 11.10 | Entropy loss: -0.0205  | Total Loss: 10.25 | Total Steps: 500
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5476/200000  | Episode Reward: 4.0  | Average Reward 4.24  | Actor loss: 0.02 | Critic loss: 4.40 | Entropy loss:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5497/200000  | Episode Reward: 6.0  | Average Reward 3.90  | Actor loss: 0.08 | Critic loss: 4.23 | Entropy loss: -0.0188  | Total Loss: 4.29 | Total Steps: 144
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5498/200000  | Episode Reward: 10.0  | Average Reward 3.94  | Actor loss: 0.13 | Critic loss: 4.12 | Entropy loss: -0.0116  | Total Loss: 4.24 | Total Steps: 106
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5499/200000  | Episode Reward: 10.0  | Average Reward 3.94  | Actor loss: 0.53 | Critic loss: 4.29 | Entropy loss: -0.0015  | Total Loss: 4.83 | Total Steps: 17
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5500/200000  | Episode Reward: 10.0  | Average Rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5522/200000  | Episode Reward: 1.0  | Average Reward 3.71  | Actor loss: -0.47 | Critic loss: 3.78 | Entropy loss: -0.0261  | Total Loss: 3.29 | Total Steps: 245
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5523/200000  | Episode Reward: 9.0  | Average Reward 3.75  | Actor loss: -0.14 | Critic loss: 3.33 | Entropy loss: -0.0138  | Total Loss: 3.17 | Total Steps: 111
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5524/200000  | Episode Reward: 4.0  | Average Reward 3.72  | Actor loss: -0.58 | Critic loss: 6.02 | Entropy loss: -0.0102  | Total Loss: 5.43 | Total Steps: 97
--- target colour: green, target object: cylinder ---
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5547/200000  | Episode Reward: 10.0  | Average Reward 4.03  | Actor loss: 0.44 | Critic loss: 3.55 | Entropy loss: -0.0014  | Total Loss: 3.99 | Total Steps: 17
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5548/200000  | Episode Reward: 10.0  | Average Reward 4.03  | Actor loss: 0.64 | Critic loss: 3.80 | Entropy loss: -0.0042  | Total Loss: 4.44 | Total Steps: 18
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5549/200000  | Episode Reward: 1.0  | Average Reward 3.98  | Actor loss: -0.93 | Critic loss: 3.92 | Entropy loss: -0.0406  | Total Loss: 2.95 | Total Steps: 242
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Te

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5572/200000  | Episode Reward: 10.0  | Average Reward 3.76  | Actor loss: -0.16 | Critic loss: 3.32 | Entropy loss: -0.0193  | Total Loss: 3.13 | Total Steps: 143
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5573/200000  | Episode Reward: 10.0  | Average Reward 3.76  | Actor loss: 0.10 | Critic loss: 3.56 | Entropy loss: -0.0020  | Total Loss: 3.66 | Total Steps: 23
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5574/200000  | Episode Reward: 10.0  | Average Reward 3.76  | Actor loss: -0.02 | Critic loss: 3.82 | Entropy loss: -0.0096  | Total Loss: 3.79 | Total Steps: 107
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5598/200000  | Episode Reward: 4.0  | Average Reward 3.69  | Actor loss: -0.30 | Critic loss: 4.78 | Entropy loss: -0.0180  | Total Loss: 4.47 | Total Steps: 181
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5599/200000  | Episode Reward: -2.0  | Average Reward 3.67  | Actor loss: -0.19 | Critic loss: 5.83 | Entropy loss: -0.0154  | Total Loss: 5.62 | Total Steps: 175
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5600/200000  | Episode Reward: 7.0  | Average Reward 3.65  | Actor loss: 0.06 | Critic loss: 3.90 | Entropy loss: -0.0090  | Total Loss: 3.95 | Total Steps: 112
--- target colour: re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5623/200000  | Episode Reward: 10.0  | Average Reward 3.85  | Actor loss: -0.15 | Critic loss: 2.61 | Entropy loss: -0.0176  | Total Loss: 2.44 | Total Steps: 121
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5624/200000  | Episode Reward: 1.0  | Average Reward 4.05  | Actor loss: -0.26 | Critic loss: 5.12 | Entropy loss: -0.0150  | Total Loss: 4.84 | Total Steps: 140
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5625/200000  | Episode Reward: 1.0  | Average Reward 4.01  | Actor loss: 0.84 | Critic loss: 7.69 | Entropy loss: -0.0012  | Total Loss: 8.52 | Total Steps: 268
--- target colour: black, target ob

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5647/200000  | Episode Reward: 10.0  | Average Reward 3.80  | Actor loss: -0.09 | Critic loss: 3.72 | Entropy loss: -0.0120  | Total Loss: 3.62 | Total Steps: 100
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5648/200000  | Episode Reward: 4.0  | Average Reward 3.81  | Actor loss: -0.65 | Critic loss: 5.88 | Entropy loss: -0.0161  | Total Loss: 5.22 | Total Steps: 104
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5649/200000  | Episode Reward: 10.0  | Average Reward 3.83  | Actor loss: -0.26 | Critic loss: 2.65 | Entropy loss: -0.0157  | Total Loss: 2.37 | Total Steps: 121
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5672/200000  | Episode Reward: 4.0  | Average Reward 3.74  | Actor loss: -0.28 | Critic loss: 4.46 | Entropy loss: -0.0120  | Total Loss: 4.17 | Total Steps: 112
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5673/200000  | Episode Reward: -5.0  | Average Reward 3.69  | Actor loss: -0.49 | Critic loss: 5.46 | Entropy loss: -0.0211  | Total Loss: 4.95 | Total Steps: 198
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5674/200000  | Episode Reward: 1.0  | Average Reward 3.65  | Actor loss: -0.59 | Critic loss: 4.75 | Entropy l

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5696/200000  | Episode Reward: 1.0  | Average Reward 4.08  | Actor loss: -0.59 | Critic loss: 4.73 | Entropy loss: -0.0218  | Total Loss: 4.11 | Total Steps: 135
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5697/200000  | Episode Reward: 4.0  | Average Reward 4.07  | Actor loss: -0.22 | Critic loss: 7.38 | Entropy loss: -0.0058  | Total Loss: 7.16 | Total Steps: 78
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5698/200000  | Episode Reward: 10.0  | Average Reward 4.07  | Actor loss: -0.24 | Critic loss: 4.81 | Entropy loss: -0.0056  | Total Loss: 4.57 | Total Steps: 75
--- target colour: red, target object: cylinder ---
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5722/200000  | Episode Reward: 7.0  | Average Reward 4.28  | Actor loss: -0.23 | Critic loss: 2.84 | Entropy loss: -0.0262  | Total Loss: 2.59 | Total Steps: 242
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5723/200000  | Episode Reward: 1.0  | Average Reward 4.24  | Actor loss: -0.41 | Critic loss: 5.53 | Entropy loss: -0.0171  | Total Loss: 5.10 | Total Steps: 166
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5724/200000  | Episode Reward: 1.0  | Average Reward 4.22  | Actor loss: -0.23 | Critic loss: 5.40 | Entropy loss: -0.0211  | Total Loss: 5.16 | Total Steps: 174
--- target col

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5747/200000  | Episode Reward: 4.0  | Average Reward 4.12  | Actor loss: -0.49 | Critic loss: 3.80 | Entropy loss: -0.0139  | Total Loss: 3.30 | Total Steps: 122
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5748/200000  | Episode Reward: 1.0  | Average Reward 4.08  | Actor loss: -0.46 | Critic loss: 5.36 | Entropy loss: -0.0224  | Total Loss: 4.88 | Total Steps: 174
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5749/200000  | Episode Reward: 7.0  | Average Reward 4.11  | Actor loss: -0.30 | Critic loss: 5.02 | Entropy loss: -0.0144  | Total Loss: 4.70 | Total Steps: 95
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5773/200000  | Episode Reward: 10.0  | Average Reward 4.36  | Actor loss: 0.34 | Critic loss: 2.89 | Entropy loss: -0.0014  | Total Loss: 3.23 | Total Steps: 17
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5774/200000  | Episode Reward: 10.0  | Average Reward 4.36  | Actor loss: -0.49 | Critic loss: 3.70 | Entropy loss: -0.0185  | Total Loss: 3.19 | Total Steps: 163
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5775/200000  | Episode Reward: 7.0  | Average Reward 4.42  | Actor loss: -0.66 | Critic loss: 4.22 | Entropy loss: -0.0120  | Total Loss: 3.55 | Total Steps: 133
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5798/200000  | Episode Reward: 7.0  | Average Reward 4.33  | Actor loss: -0.19 | Critic loss: 3.59 | Entropy loss: -0.0136  | Total Loss: 3.39 | Total Steps: 168
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5799/200000  | Episode Reward: 1.0  | Average Reward 4.34  | Actor loss: -0.51 | Critic loss: 5.61 | Entropy loss: -0.0163  | Total Loss: 5.09 | Total Steps: 165
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5800/200000  | Episode Reward: -5.0  | Average Reward 4.29  | Actor loss: 0.86 | Critic loss: 8.20 | Entropy loss: -0.0016  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5822/200000  | Episode Reward: 4.0  | Average Reward 4.05  | Actor loss: -0.10 | Critic loss: 5.25 | Entropy loss: -0.0209  | Total Loss: 5.13 | Total Steps: 113
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5823/200000  | Episode Reward: -8.0  | Average Reward 3.96  | Actor loss: -0.49 | Critic loss: 7.28 | Entropy loss: -0.0172  | Total Loss: 6.78 | Total Steps: 237
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5824/200000  | Episode Reward: 4.0  | Average Reward 3.98  | Actor loss: -0.27 | Critic loss: 5.27 | Entropy loss: -0.0131  | Total Loss: 4.98 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5847/200000  | Episode Reward: 10.0  | Average Reward 4.22  | Actor loss: 0.03 | Critic loss: 4.49 | Entropy loss: -0.0153  | Total Loss: 4.50 | Total Steps: 105
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5848/200000  | Episode Reward: 7.0  | Average Reward 4.24  | Actor loss: -0.10 | Critic loss: 2.89 | Entropy loss: -0.0122  | Total Loss: 2.77 | Total Steps: 114
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5849/200000  | Episode Reward: 10.0  | Average Reward 4.24  | Actor loss: 0.11 | Critic loss: 2.68 | Entropy loss: -0.0015  | Total Loss: 2.79 | Total Steps: 19
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5872/200000  | Episode Reward: 10.0  | Average Reward 4.14  | Actor loss: 0.16 | Critic loss: 2.23 | Entropy loss: -0.0015  | Total Loss: 2.39 | Total Steps: 18
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5873/200000  | Episode Reward: 10.0  | Average Reward 4.21  | Actor loss: 0.06 | Critic loss: 3.88 | Entropy loss: -0.0217  | Total Loss: 3.92 | Total Steps: 116
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5874/200000  | Episode Reward: 7.0  | Average Reward 4.25  | Actor loss: -0.07 | Critic loss: 3.49 | Entropy loss: -0.0182  | Total Loss: 3.41 | Total Steps: 165
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5897/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: -0.26 | Critic loss: 6.27 | Entropy loss: -0.0089  | Total Loss: 6.01 | Total Steps: 78
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5898/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.90 | Critic loss: 2.39 | Entropy loss: -0.0035  | Total Loss: 3.28 | Total Steps: 18
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5899/200000  | Episode Reward: 4.0  | Average Reward 4.39  | Actor loss: -0.35 | Critic loss: 5.08 | Entropy loss: -0.0210  | Total Loss: 4.70 | Total Steps: 124
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5900/200000  | Episode Re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5923/200000  | Episode Reward: -11.0  | Average Reward 4.45  | Actor loss: 1.33 | Critic loss: 7.36 | Entropy loss: -0.0036  | Total Loss: 8.69 | Total Steps: 269
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5924/200000  | Episode Reward: 7.0  | Average Reward 4.48  | Actor loss: -0.38 | Critic loss: 5.87 | Entropy loss: -0.0169  | Total Loss: 5.48 | Total Steps: 112
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5946/200000  | Episode Reward: 4.0  | Average Reward 3.98  | Actor loss: -0.91 | Critic loss: 7.01 | Entropy loss: -0.0172  | Total Loss: 6.08 | Total Steps: 105
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5947/200000  | Episode Reward: 10.0  | Average Reward 4.00  | Actor loss: 0.99 | Critic loss: 3.05 | Entropy loss: -0.0029  | Total Loss: 4.04 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5948/200000  | Episode Reward: 4.0  | Average Reward 4.02  | Actor loss: 0.04 | Critic loss: 4.76 | Entropy loss: -0.0110  | Total Loss: 4.79 | Total Steps: 348
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5970/200000  | Episode Reward: 4.0  | Average Reward 3.58  | Actor loss: -0.13 | Critic loss: 4.52 | Entropy loss: -0.0254  | Total Loss: 4.36 | Total Steps: 186
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5971/200000  | Episode Reward: 7.0  | Average Reward 3.62  | Actor loss: -0.46 | Critic loss: 3.93 | Entropy loss: -0.0166  | Total Loss: 3.45 | Total Steps: 140
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5972/200000  | Episode Reward: 7.0  | Average Reward 3.60  | Actor loss: -0.53 | Critic loss: 4.25 | Entropy loss: -0.0218  | Total Loss: 3.70 | Total Steps: 154
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5996/200000  | Episode Reward: 10.0  | Average Reward 3.83  | Actor loss: -0.06 | Critic loss: 3.83 | Entropy loss: -0.0154  | Total Loss: 3.75 | Total Steps: 105
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5997/200000  | Episode Reward: 1.0  | Average Reward 3.78  | Actor loss: -0.51 | Critic loss: 5.79 | Entropy loss: -0.0164  | Total Loss: 5.26 | Total Steps: 175
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 5998/200000  | Episode Reward: 4.0  | Average Reward 3.77  | Actor loss: 0.34 | Critic loss: 2.91 | Entropy loss: -0.0013  | Total Loss: 3.24 | Total Steps: 274
--- target colour: blue, target object: cube ---
Decision 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6019/200000  | Episode Reward: 1.0  | Average Reward 3.46  | Actor loss: -0.36 | Critic loss: 6.57 | Entropy loss: -0.0171  | Total Loss: 6.19 | Total Steps: 146
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6020/200000  | Episode Reward: 7.0  | Average Reward 3.48  | Actor loss: -0.19 | Critic loss: 6.70 | Entropy loss: -0.0113  | Total Loss: 6.50 | Total Steps: 95
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6021/200000  | Episode Reward: 10.0  | Average Reward 3.48  | Actor loss: 0.99 | Critic loss: 4.50 | Entropy loss: -0.0026  | Total Loss: 5.49 | Total Steps: 21
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6022/

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6044/200000  | Episode Reward: -5.0  | Average Reward 3.59  | Actor loss: -0.25 | Critic loss: 3.01 | Entropy loss: -0.0196  | Total Loss: 2.74 | Total Steps: 427
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6045/200000  | Episode Reward: 10.0  | Average Reward 3.59  | Actor loss: 0.14 | Critic loss: 3.07 | Entropy loss: -0.0004  | Total Loss: 3.21 | Total Steps: 17
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6046/200000  | Episode Reward: 4.0  | Average Reward 3.60  | Actor loss: -0.25 | Critic loss: 4.54 | Entropy loss: -0.0157  | Total Loss: 4.27 | Total Steps: 186
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6068/200000  | Episode Reward: 1.0  | Average Reward 3.65  | Actor loss: -0.76 | Critic loss: 5.73 | Entropy loss: -0.0243  | Total Loss: 4.95 | Total Steps: 142
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6069/200000  | Episode Reward: 3.0  | Average Reward 3.62  | Actor loss: -0.23 | Critic loss: 4.72 | Entropy loss: -0.0145  | Total Loss: 4.48 | Total Steps: 116
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6070/200000  | Episode Reward: 10.0  | Average Reward 3.65  | Actor loss: 0.10 | Critic loss: 4.67 | Entropy loss: -0.0107  | Total Loss: 4.76 | Total Steps: 102
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6093/200000  | Episode Reward: 10.0  | Average Reward 3.27  | Actor loss: 0.68 | Critic loss: 7.45 | Entropy loss: -0.0126  | Total Loss: 8.11 | Total Steps: 75
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6094/200000  | Episode Reward: 10.0  | Average Reward 3.29  | Actor loss: 0.45 | Critic loss: 3.87 | Entropy loss: -0.0012  | Total Loss: 4.32 | Total Steps: 17
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6095/200000  | Episode Reward: 10.0  | Average Reward 3.35  | Actor loss: -0.01 | Critic loss: 5.25 | Entropy loss: -0.0018  | Total Loss: 5.24 | Total Steps: 24
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6096/200000  | Episode Reward: 10.0  | Average Reward 3.43  | Actor loss: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6119/200000  | Episode Reward: -18.0  | Average Reward 3.18  | Actor loss: -0.40 | Critic loss: 5.10 | Entropy loss: -0.0129  | Total Loss: 4.69 | Total Steps: 396
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6120/200000  | Episode Reward: 10.0  | Average Reward 3.18  | Actor loss: 0.25 | Critic loss: 3.33 | Entropy loss: -0.0006  | Total Loss: 3.58 | Total Steps: 17
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6121/200000  | Episode Reward: 1.0  | Average Rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6143/200000  | Episode Reward: -2.0  | Average Reward 3.44  | Actor loss: -0.72 | Critic loss: 4.50 | Entropy loss: -0.0218  | Total Loss: 3.76 | Total Steps: 167
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6144/200000  | Episode Reward: 10.0  | Average Reward 3.46  | Actor loss: 0.66 | Critic loss: 3.85 | Entropy loss: -0.0017  | Total Loss: 4.50 | Total Steps: 18
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6145/200000  | Episode Reward: 1.0  | Average Reward 3.46  | Actor loss: -0.82 | Critic loss: 5.54 | Entropy loss: -0.0215  | Total Loss: 4.70 | Total Steps: 129
--- target colour: re

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6169/200000  | Episode Reward: 7.0  | Average Reward 3.90  | Actor loss: -0.12 | Critic loss: 4.13 | Entropy loss: -0.0197  | Total Loss: 3.99 | Total Steps: 157
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6170/200000  | Episode Reward: 4.0  | Average Reward 3.90  | Actor loss: -0.45 | Critic loss: 6.17 | Entropy loss: -0.0114  | Total Loss: 5.71 | Total Steps: 111
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6171/200000  | Episode Reward: -5.0  | Average Reward 3.84  | Actor loss: -0.51 | Critic loss: 7.09 | Entropy loss: -0.0278  | Total Loss: 6.55 | Total Step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6194/200000  | Episode Reward: 4.0  | Average Reward 3.75  | Actor loss: -0.48 | Critic loss: 5.01 | Entropy loss: -0.0300  | Total Loss: 4.51 | Total Steps: 184
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6195/200000  | Episode Reward: 1.0  | Average Reward 3.70  | Actor loss: -0.59 | Critic loss: 6.08 | Entropy loss: -0.0334  | Total Loss: 5.46 | Total Steps: 189
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6196/200000  | Episode Reward: 10.0  | Average Reward 3.70  | Actor loss: 0.82 | Critic loss: 3.31 | Entropy loss: -0.0025  | Total Loss: 4.12 | Total Steps: 18
--- target colour: blue, target object: sphere ---
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6220/200000  | Episode Reward: 7.0  | Average Reward 4.42  | Actor loss: -0.23 | Critic loss: 3.49 | Entropy loss: -0.0186  | Total Loss: 3.24 | Total Steps: 163
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6221/200000  | Episode Reward: 7.0  | Average Reward 4.40  | Actor loss: -0.53 | Critic loss: 5.06 | Entropy loss: -0.0163  | Total Loss: 4.52 | Total Steps: 113
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6222/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.22 | Critic loss: 3.00 | Entropy loss: -0.0011  | Total Loss: 3.21 | Total Steps: 17
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6223/200000  | Episode R

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6246/200000  | Episode Reward: 7.0  | Average Reward 4.58  | Actor loss: -0.23 | Critic loss: 3.58 | Entropy loss: -0.0255  | Total Loss: 3.32 | Total Steps: 179
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6247/200000  | Episode Reward: 10.0  | Average Reward 4.59  | Actor loss: 0.79 | Critic loss: 4.16 | Entropy loss: -0.0017  | Total Loss: 4.95 | Total Steps: 18
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6248/200000  | Episode Reward: -2.0  | Average Reward 4.53  | Actor loss: -0.47 | Critic loss: 4.64 | Entropy loss: -0.0307  | Total Loss: 4.14 | Total Steps: 221
--- target colour: black, target object: cube ---
Agent in terminal step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6272/200000  | Episode Reward: -14.0  | Average Reward 4.88  | Actor loss: -0.58 | Critic loss: 6.53 | Entropy loss: -0.0173  | Total Loss: 5.94 | Total Steps: 437
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6273/200000  | Episode Reward: -5.0  | Average Reward 4.83  | Actor loss: -0.53 | Critic loss: 8.03 | Entropy loss: -0.0111  | Total Loss: 7.49 | Total Steps: 152
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6296/200000  | Episode Reward: 4.0  | Average Reward 4.92  | Actor loss: -0.21 | Critic loss: 4.12 | Entropy loss: -0.0196  | Total Loss: 3.90 | Total Steps: 172
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6297/200000  | Episode Reward: 10.0  | Average Reward 4.96  | Actor loss: -0.22 | Critic loss: 4.43 | Entropy loss: -0.0138  | Total Loss: 4.20 | Total Steps: 105
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6298/200000  | Episode Reward: 1.0  | Average Reward 4.94  | Actor loss: -0.80 | Critic loss: 5.46 | Entropy loss: -0.0214  | Total Loss: 4.64 | Total Steps: 167
--- target colour: red, target object: sphere ---
Step: 250
Max Ste

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6321/200000  | Episode Reward: -2.0  | Average Reward 4.57  | Actor loss: -0.78 | Critic loss: 6.95 | Entropy loss: -0.0174  | Total Loss: 6.15 | Total Steps: 166
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6322/200000  | Episode Reward: 1.0  | Average Reward 4.55  | Actor loss: -0.54 | Critic loss: 7.53 | Entropy loss: -0.0147  | Total Loss: 6.98 | Total Steps: 99
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6323/200000  | Episode Reward: 1.0  | Average Reward 4.77  | Actor loss: -0.93 | Critic loss: 5.87 | Ent

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6346/200000  | Episode Reward: 4.0  | Average Reward 4.80  | Actor loss: -0.37 | Critic loss: 6.47 | Entropy loss: -0.0099  | Total Loss: 6.08 | Total Steps: 102
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6347/200000  | Episode Reward: 7.0  | Average Reward 4.79  | Actor loss: -0.18 | Critic loss: 3.89 | Entropy loss: -0.0181  | Total Loss: 3.69 | Total Steps: 154
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6348/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 1.14 | Critic loss: 2.68 | Entropy loss: -0.0049  | Total Loss: 3.81 | Total Steps: 18
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agen

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6370/200000  | Episode Reward: 10.0  | Average Reward 4.37  | Actor loss: -0.23 | Critic loss: 5.11 | Entropy loss: -0.0133  | Total Loss: 4.87 | Total Steps: 102
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6371/200000  | Episode Reward: 10.0  | Average Reward 4.45  | Actor loss: 0.75 | Critic loss: 3.12 | Entropy loss: -0.0019  | Total Loss: 3.87 | Total Steps: 18
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6372/200000  | Episode Reward: 1.0  | Average Reward 4.40  | Actor loss: -0.32 | Critic loss: 6.20 | Entropy loss: -0.0110  | Total Loss: 5.87 | Total Steps: 139
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6396/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.30 | Critic loss: 4.34 | Entropy loss: -0.0015  | Total Loss: 4.63 | Total Steps: 18
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6397/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.39 | Critic loss: 3.78 | Entropy loss: -0.0016  | Total Loss: 4.16 | Total Steps: 18
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6398/200000  | Episode Reward: 7.0  | Average Reward 4.41  | Actor loss: -0.15 | Critic loss: 7.56 | Entropy loss: -0.0090  | Total Loss: 7.40 | Total Steps: 77
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6399/200000  | Episode Reward: 10.0  | Average Re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6421/200000  | Episode Reward: 10.0  | Average Reward 4.18  | Actor loss: -0.08 | Critic loss: 3.44 | Entropy loss: -0.0167  | Total Loss: 3.35 | Total Steps: 106
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6422/200000  | Episode Reward: 2.0  | Average Reward 4.14  | Actor loss: -0.35 | Critic loss: 4.28 | Entropy loss: -0.0275  | Total Loss: 3.90 | Total Steps: 195
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6423/200000  | Episode Reward: 4.0  | Average Reward 4.12  | Actor loss: 0.80 | Critic loss: 8.37 | Entropy loss: -0.0012  | Total Loss: 9.17 | Total Steps: 260
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal S

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6446/200000  | Episode Reward: 7.0  | Average Reward 3.88  | Actor loss: -0.02 | Critic loss: 3.42 | Entropy loss: -0.0037  | Total Loss: 3.39 | Total Steps: 109
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6447/200000  | Episode Reward: 10.0  | Average Reward 3.88  | Actor loss: 0.39 | Critic loss: 4.84 | Entropy loss: -0.0015  | Total Loss: 5.23 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6448/200000  | Episode Reward: 7.0  | Average Reward 3.92  | Actor loss: -0.39 | Critic loss: 3.71 | Entropy loss: -0.0278  | Total Loss: 3.30 | Total Steps: 173
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6472/200000  | Episode Reward: 1.0  | Average Reward 3.90  | Actor loss: -0.59 | Critic loss: 6.45 | Entropy loss: -0.0187  | Total Loss: 5.84 | Total Steps: 139
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6473/200000  | Episode Reward: 4.0  | Average Reward 3.94  | Actor loss: -0.27 | Critic loss: 4.47 | Entropy loss: -0.0118  | Total Loss: 4.19 | Total Steps: 142
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6474/200000  | Episode Reward: 1.0  | Average Reward 3.93  | Actor loss: -0.41 | Critic loss: 6.56 | Entropy loss: -0.0136  | Total Loss: 6.14 | Total Steps: 156


Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6499/200000  | Episode Reward: 4.0  | Average Reward 4.41  | Actor loss: -0.14 | Critic loss: 6.16 | Entropy loss: -0.0171  | Total Loss: 6.00 | Total Steps: 109
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6500/200000  | Episode Reward: 10.0  | Average Reward 4.41  | Actor loss: 0.27 | Critic loss: 2.02 | Entropy loss: -0.0013  | Total Loss: 2.29 | Total Steps: 16
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6501/200000  | Episode Reward: 4.0  | Average Reward 4.38  | Actor loss: -0.77 | Critic loss: 5.93 | Entropy loss: -0.0155  | Total Loss: 5.14 | Total Steps: 113
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6524/200000  | Episode Reward: -5.0  | Average Reward 4.55  | Actor loss: -0.56 | Critic loss: 6.40 | Entropy loss: -0.0168  | Total Loss: 5.82 | Total Steps: 220
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6525/200000  | Episode Reward: 4.0  | Average Reward 4.53  | Actor loss: -0.30 | Critic loss: 5.33 | Entropy loss: -0.0110  | Total Loss: 5.02 | Total Steps: 115
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6526/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: 0.13 | Critic loss: 6.59 | Entropy loss: -0.0127  | Tot

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6550/200000  | Episode Reward: 7.0  | Average Reward 4.67  | Actor loss: -0.17 | Critic loss: 5.83 | Entropy loss: -0.0120  | Total Loss: 5.65 | Total Steps: 100
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6551/200000  | Episode Reward: 10.0  | Average Reward 4.67  | Actor loss: 1.07 | Critic loss: 3.27 | Entropy loss: -0.0031  | Total Loss: 4.33 | Total Steps: 19
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6552/200000  | Episode Reward: 10.0  | Average Reward 4.70  | Actor loss: 0.24 | Critic loss: 3.43 | Entropy loss: -0.0016  | Total Loss: 3.67 | Total Steps: 19
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6575/200000  | Episode Reward: 10.0  | Average Reward 4.91  | Actor loss: 0.05 | Critic loss: 2.61 | Entropy loss: -0.0110  | Total Loss: 2.65 | Total Steps: 153
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6576/200000  | Episode Reward: 4.0  | Average Reward 4.88  | Actor loss: -0.18 | Critic loss: 5.24 | Entropy loss: -0.0088  | Total Loss: 5.05 | Total Steps: 102
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6577/200000  | Episode Reward: 10.0  | Average Reward 4.92  | Actor loss: 1.49 | Critic loss: 4.81 | Entropy loss: -0.0034  | Total Loss: 6.30 | Total Steps: 20
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6578/200000  | Epi

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6600/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.49 | Critic loss: 5.28 | Entropy loss: -0.0268  | Total Loss: 4.76 | Total Steps: 183
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6601/200000  | Episode Reward: 10.0  | Average Reward 4.54  | Actor loss: 0.37 | Critic loss: 3.20 | Entropy loss: -0.0015  | Total Loss: 3.57 | Total Steps: 17
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6602/200000  | Episode Reward: 4.0  | Average Reward 4.51  | Actor loss: -0.21 | Critic loss: 6.69 | Entropy loss: -0.0107  | Total Loss: 6.46 | Total Steps: 101
--- target colour: black, target object: pr

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6625/200000  | Episode Reward: 7.0  | Average Reward 4.42  | Actor loss: 0.01 | Critic loss: 6.84 | Entropy loss: -0.0104  | Total Loss: 6.84 | Total Steps: 91
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6626/200000  | Episode Reward: -2.0  | Average Reward 4.36  | Actor loss: -0.41 | Critic loss: 4.92 | Entropy loss: -0.0213  | Total Loss: 4.49 | Total Steps: 229
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6627/200000  | Episode Reward: -2.0  | Average Reward 4.33  | Actor loss: -0.30 | Critic loss: 6.26 | Entropy loss: -0.0198  | Total 

Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6648/200000  | Episode Reward: -8.0  | Average Reward 3.98  | Actor loss: 1.03 | Critic loss: 7.49 | Entropy loss: -0.0017  | Total Loss: 8.52 | Total Steps: 269
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6649/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.73 | Critic loss: 4.73 | Entropy loss: -0.0186  | Total Loss: 3.98 | Total Steps: 121
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6650/200000  | Episode Reward: 7.0  | Average Reward 3.95  | Actor loss: -0.21 | Critic loss: 4.61 | Entropy loss: -0.0099  | Total Loss: 4.39 | Total Steps: 127
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6673/200000  | Episode Reward: 4.0  | Average Reward 3.92  | Actor loss: -0.20 | Critic loss: 4.41 | Entropy loss: -0.0147  | Total Loss: 4.19 | Total Steps: 146
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6674/200000  | Episode Reward: 4.0  | Average Reward 3.94  | Actor loss: -0.18 | Critic loss: 4.53 | Entropy loss: -0.0160  | Total Loss: 4.33 | Total Steps: 119
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6675/200000  | Episode Reward: 1.0  | Average Reward 3.90  | Actor loss: -0.58 | Critic loss: 5.67 | Entropy loss: -0.0233  | Total Loss: 5.06 | Total Steps: 179
--- target colour: black, target object: prism ---
Agent in terminal steps

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6699/200000  | Episode Reward: 1.0  | Average Reward 3.68  | Actor loss: -0.27 | Critic loss: 7.27 | Entropy loss: -0.0253  | Total Loss: 6.97 | Total Steps: 148
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6700/200000  | Episode Reward: -11.0  | Average Reward 3.58  | Actor loss: -1.04 | Critic loss: 8.22 | Entropy loss: -0.0259  | Total Loss: 7.15 | Total Steps: 237
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6701/200000  | Episode Reward: 1

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6721/200000  | Episode Reward: -8.0  | Average Reward 2.97  | Actor loss: 0.02 | Critic loss: 5.13 | Entropy loss: -0.0027  | Total Loss: 5.14 | Total Steps: 335
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6722/200000  | Episode Reward: 10.0  | Average Reward 3.04  | Actor loss: 0.84 | Critic loss: 3.15 | Entropy loss: -0.0026  | Total Loss: 3.98 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6723/200000  | Episode Reward: 4.0  | Average Reward 3.04  | Actor loss: -0.04 | Critic loss: 5.30 | Entropy loss: -0.0150  | Total Loss: 5.25 | Total Steps: 117
--- target colour: black, target 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6746/200000  | Episode Reward: 1.0  | Average Reward 2.86  | Actor loss: -0.51 | Critic loss: 4.92 | Entropy loss: -0.0225  | Total Loss: 4.39 | Total Steps: 161
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6747/200000  | Episode Reward: -11.0  | Average Reward 2.85  | Actor loss: -0.64 | Critic loss: 6.40 | Entropy loss: -0.0137  | Total Loss: 5.74 | Total Steps: 415
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6748/200000  | Episode Reward: 7.0  | Average Reward 2.84  | Actor loss: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6769/200000  | Episode Reward: 4.0  | Average Reward 2.27  | Actor loss: 0.12 | Critic loss: 4.23 | Entropy loss: -0.0213  | Total Loss: 4.33 | Total Steps: 142
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6770/200000  | Episode Reward: -5.0  | Average Reward 2.23  | Actor loss: -0.14 | Critic loss: 4.53 | Entropy loss: -0.0363  | Total Loss: 4.35 | Total Steps: 247
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6771/200000  | Episode Reward: 4.0  | Average Reward 2.20  | Actor loss: -0.42 | Critic loss: 3.49 | Entropy loss: -0.0246  | Total Lo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6794/200000  | Episode Reward: 0.0  | Average Reward 2.69  | Actor loss: -0.46 | Critic loss: 5.42 | Entropy loss: -0.0180  | Total Loss: 4.94 | Total Steps: 154
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6795/200000  | Episode Reward: 10.0  | Average Reward 2.71  | Actor loss: 0.33 | Critic loss: 3.91 | Entropy loss: -0.0013  | Total Loss: 4.23 | Total Steps: 17
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6796/200000  | Episode Reward: 10.0  | Average Reward 2.76  | Actor loss: 0.90 | Critic loss: 2.96 | Entropy loss: -0.0035  | Total Loss: 3.86 | Total Steps: 18
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6819/200000  | Episode Reward: 10.0  | Average Reward 2.73  | Actor loss: 0.32 | Critic loss: 2.73 | Entropy loss: -0.0016  | Total Loss: 3.04 | Total Steps: 17
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6820/200000  | Episode Reward: 1.0  | Average Reward 2.69  | Actor loss: -0.48 | Critic loss: 5.77 | Entropy loss: -0.0181  | Total Loss: 5.27 | Total Steps: 163
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6821/200000  | Episode Reward: 10.0  | Average Reward 2.71  | Actor loss: -0.16 | Critic loss: 4.22 | Entropy loss: -0.0106  | Total Loss: 4.05 | Total Steps: 105
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6845/200000  | Episode Reward: 7.0  | Average Reward 3.44  | Actor loss: -0.23 | Critic loss: 4.01 | Entropy loss: -0.0219  | Total Loss: 3.76 | Total Steps: 142
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6846/200000  | Episode Reward: 7.0  | Average Reward 3.43  | Actor loss: -0.68 | Critic loss: 6.33 | Entropy loss: -0.0093  | Total Loss: 5.64 | Total Steps: 97
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6847/200000  | Episode Reward: 0.0  | Average Reward 3.38  | Actor loss: -0.56 | Critic loss: 6.18 | Entropy loss: -0.0216  | Total Loss: 5.60 | Total Steps: 154
--- target colour: blue, target object: capsule ---
Decision Step reward:

Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 6870/200000  | Episode Reward: -26.0  | Average Reward 3.35  | Actor loss: -0.50 | Critic loss: 6.89 | Entropy loss: -0.0216  | Total Loss: 6.37 | Total Steps: 500
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6871/200000  | Episode Reward: 4.0  | Average Reward 3.31  | Actor loss: -0.25 | Critic loss: 4.93 | Entropy loss: -0.0136  | Total Loss: 4.67 | Total Steps: 116
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6872/200000  | Episode Reward: 10.0  | Average Reward 3.36  | Actor loss: 0.45 | Critic loss: 5.16 | Entropy loss: -0.0012  | Total Loss: 5.60 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6897/200000  | Episode Reward: 7.0  | Average Reward 3.58  | Actor loss: -0.06 | Critic loss: 8.44 | Entropy loss: -0.0060  | Total Loss: 8.38 | Total Steps: 78
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6898/200000  | Episode Reward: -2.0  | Average Reward 3.52  | Actor loss: -0.42 | Critic loss: 8.30 | Entropy loss: -0.0095  | Total Loss: 7.87 | Total Steps: 172
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6899/200000  | Episode Reward: 7.0  | Average Reward 3.55  | Actor loss: -0.53 | Critic loss: 3.39 | Entropy loss: -0.0145  | Total Loss: 2.85 | Total Steps: 123
--- target colour: blue, target object: capsule 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6922/200000  | Episode Reward: -2.0  | Average Reward 4.29  | Actor loss: -0.31 | Critic loss: 7.11 | Entropy loss: -0.0209  | Total Loss: 6.78 | Total Steps: 148
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6923/200000  | Episode Reward: 3.0  | Average Reward 4.29  | Actor loss: -0.51 | Critic loss: 4.54 | Entropy loss: -0.0182  | Total Loss: 4.02 | Total Steps: 166
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6924/200000  | Episode Reward: 1.0  | Average Reward 4.27  | Actor loss: -0.32 | Critic loss: 6.17 | Entropy loss: -0.0142  | T

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6946/200000  | Episode Reward: 4.0  | Average Reward 4.26  | Actor loss: -0.10 | Critic loss: 5.45 | Entropy loss: -0.0144  | Total Loss: 5.34 | Total Steps: 108
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6947/200000  | Episode Reward: 7.0  | Average Reward 4.35  | Actor loss: 1.91 | Critic loss: 6.49 | Entropy loss: -0.0033  | Total Loss: 8.39 | Total Steps: 266
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6948/200000  | Episode Reward: 7.0  | Average Reward 4.35  | Actor loss: -0.45 | Critic loss: 3.00 | Entropy loss: -0.0184  | Total Loss: 2.54 | Total Steps: 141
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal S

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6971/200000  | Episode Reward: 4.0  | Average Reward 4.79  | Actor loss: -0.27 | Critic loss: 4.28 | Entropy loss: -0.0207  | Total Loss: 3.99 | Total Steps: 116
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6972/200000  | Episode Reward: 1.0  | Average Reward 4.83  | Actor loss: -0.36 | Critic loss: 4.91 | Entropy loss: -0.0173  | Total Loss: 4.53 | Total Steps: 150
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6973/200000  | Episode Reward: 1.0  | Average Reward 4.82  | Actor loss: -0.72 | Critic loss: 4.42 | Entropy loss: -0.0317  | Total Loss: 3.68 | Total Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6997/200000  | Episode Reward: -6.0  | Average Reward 4.71  | Actor loss: -0.77 | Critic loss: 6.65 | Entropy loss: -0.0314  | Total Loss: 5.84 | Total Steps: 169
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6998/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: 0.19 | Critic loss: 3.16 | Entropy loss: -0.0018  | Total Loss: 3.35 | Total Steps: 19
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 6999/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.31 | Critic loss: 4.51 | Entropy loss: -0.0126  | Total Loss: 4.19 | Total Steps: 140
--- target colour: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7021/200000  | Episode Reward: 4.0  | Average Reward 4.69  | Actor loss: -0.76 | Critic loss: 5.39 | Entropy loss: -0.0145  | Total Loss: 4.62 | Total Steps: 102
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7022/200000  | Episode Reward: 1.0  | Average Reward 4.64  | Actor loss: -0.11 | Critic loss: 1.89 | Entropy loss: -0.0161  | Total Loss: 1.76 | Total Steps: 412
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7023/200000  | Episode Reward: 3.0  | Average Reward 4.61  | Actor loss: -0.34 | Critic loss: 4.20 | Entropy loss: -0.0276  | Total Loss: 3.84 | Total Steps: 175
--- target colour: yellow, target object: capsule ---
Decisi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7046/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: 0.05 | Critic loss: 4.03 | Entropy loss: -0.0130  | Total Loss: 4.07 | Total Steps: 103
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7047/200000  | Episode Reward: 1.0  | Average Reward 4.63  | Actor loss: -0.56 | Critic loss: 5.16 | Entropy loss: -0.0211  | Total Loss: 4.57 | Total Steps: 175
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7048/200000  | Episode Reward: 0.0  | Average Reward 4.62  | Actor loss: -0.67 | Critic loss: 5.21 | Entropy loss: -0.0217  | Total Loss: 4.52 | Total Steps: 150
--- target colour: black, target object: cyl

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7072/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.58 | Critic loss: 4.45 | Entropy loss: -0.0245  | Total Loss: 3.84 | Total Steps: 145
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7073/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.16 | Critic loss: 4.79 | Entropy loss: -0.0132  | Total Loss: 4.62 | Total Steps: 162
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7074/200000  | Episode Reward: 6.0  | Average Reward 4.72  | Actor loss: -0.41 | Critic loss: 3.53 | Entropy loss: -0.0207  | Total Loss: 3.10 | Total Steps: 145
--- target colour: yellow, target object: cylinder ---
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7098/200000  | Episode Reward: 10.0  | Average Reward 4.60  | Actor loss: 0.83 | Critic loss: 4.85 | Entropy loss: -0.0018  | Total Loss: 5.68 | Total Steps: 21
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7099/200000  | Episode Reward: 4.0  | Average Reward 4.58  | Actor loss: -0.32 | Critic loss: 5.05 | Entropy loss: -0.0169  | Total Loss: 4.71 | Total Steps: 120
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7100/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: -0.13 | Critic loss: 3.68 | Entropy loss: -0.0111  | Total Loss: 3.54 | Total Steps: 101
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step r

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7121/200000  | Episode Reward: 1.0  | Average Reward 4.09  | Actor loss: -0.50 | Critic loss: 4.14 | Entropy loss: -0.0226  | Total Loss: 3.62 | Total Steps: 186
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7122/200000  | Episode Reward: 4.0  | Average Reward 4.12  | Actor loss: -0.16 | Critic loss: 3.87 | Entropy loss: -0.0307  | Total Loss: 3.68 | Total Steps: 175
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7123/200000  | Episode Reward: 10.0  | Average Reward 4.16  | Actor loss: 1.16 | Critic loss: 7.06 | Entropy loss: -0.0037  | Total Loss: 8.21 | Total Steps: 19
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7146/200000  | Episode Reward: -2.0  | Average Reward 4.25  | Actor loss: -0.64 | Critic loss: 5.56 | Entropy loss: -0.0331  | Total Loss: 4.88 | Total Steps: 239
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7147/200000  | Episode Reward: 1.0  | Average Reward 4.22  | Actor loss: -0.38 | Critic loss: 6.09 | Entropy loss: -0.0160  | Total Loss: 5.69 | Total Steps: 140
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7148/200000  | Episode Reward: 10.0  | Average Reward 4.24  | Actor loss: 1.21 | Critic loss: 5.77 | Entropy loss: -0.0040  | Total Loss: 6.98 | Total Steps: 18
--- target colour: yellow

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7171/200000  | Episode Reward: 10.0  | Average Reward 4.31  | Actor loss: 0.38 | Critic loss: 3.20 | Entropy loss: -0.0012  | Total Loss: 3.59 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7172/200000  | Episode Reward: 4.0  | Average Reward 4.33  | Actor loss: 0.01 | Critic loss: 21.53 | Entropy loss: -0.0001  | Total Loss: 21.54 | Total Steps: 251
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7173/200000  | Episode Reward: 10.0  | Average Reward 4.37  | Actor loss: 0.40 | Critic loss: 3.91 | Entropy loss: -0.0015  | Total Loss: 4.31 | Total Steps: 16
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Ter

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7196/200000  | Episode Reward: 7.0  | Average Reward 4.16  | Actor loss: -0.78 | Critic loss: 3.68 | Entropy loss: -0.0300  | Total Loss: 2.88 | Total Steps: 190
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7197/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.39 | Critic loss: 4.85 | Entropy loss: -0.0218  | Total Loss: 4.44 | Total Steps: 169
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7198/200000  | Episode Reward: 10.0  | Average Reward 4.21  | Actor loss: 0.58 | Critic loss: 3.56 | Entropy loss: -0.0019  | Total Loss: 4.15 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7222/200000  | Episode Reward: 6.0  | Average Reward 4.51  | Actor loss: -0.62 | Critic loss: 3.19 | Entropy loss: -0.0302  | Total Loss: 2.54 | Total Steps: 154
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7223/200000  | Episode Reward: -5.0  | Average Reward 4.47  | Actor loss: -0.00 | Critic loss: 4.14 | Entropy loss: -0.0034  | Total Loss: 4.13 | Total Steps: 307
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7224/200000  | Episode Reward: 4.0  | Average Reward 4.49  | Actor loss: -0.16 | Critic loss: 5.56 | Entropy loss: -0.0122  | Total Loss: 5.38 | Total

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7248/200000  | Episode Reward: 7.0  | Average Reward 4.58  | Actor loss: -0.41 | Critic loss: 2.97 | Entropy loss: -0.0198  | Total Loss: 2.53 | Total Steps: 137
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7249/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: -0.03 | Critic loss: 2.49 | Entropy loss: -0.0213  | Total Loss: 2.45 | Total Steps: 127
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7250/200000  | Episode Reward: -2.0  | Average Reward 4.58  | Actor loss: -0.09 | Critic loss: 5.96 | Entropy loss: -0.0234  | Total Loss: 5.85 | Total Steps: 216
--- target colour: yellow, target object: cylinder ---
Decision Ste

Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 7273/200000  | Episode Reward: -28.0  | Average Reward 4.42  | Actor loss: -0.71 | Critic loss: 5.65 | Entropy loss: -0.0265  | Total Loss: 4.92 | Total Steps: 500
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7274/200000  | Episode Reward: 7.0  | Average Reward 4.42  | Actor loss: -0.48 | Critic loss: 6.55 | Entropy loss: -0.0044  | Total Loss: 6.07 | Total Steps: 89
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7275/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 1.16 | Critic loss: 6.50 | Entropy loss: -0.0028  | Total Loss: 7.65 | Total Steps: 18
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7298/200000  | Episode Reward: 10.0  | Average Reward 4.22  | Actor loss: -0.15 | Critic loss: 2.28 | Entropy loss: -0.0159  | Total Loss: 2.11 | Total Steps: 137
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7299/200000  | Episode Reward: 4.0  | Average Reward 4.22  | Actor loss: -0.18 | Critic loss: 6.70 | Entropy loss: -0.0178  | Total Loss: 6.49 | Total Steps: 123
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7300/200000  | Episode Reward: 10.0  | Average Reward 4.22  | Actor loss: 0.38 | Critic loss: 4.47 | Entropy loss: -0.0013  | Total Loss: 4.84 | Total Steps: 17
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7301/200000  | Episode Re

Max Step Reward: -10
Step: 500
Training  | Episode: 7322/200000  | Episode Reward: -28.0  | Average Reward 4.45  | Actor loss: -0.77 | Critic loss: 6.62 | Entropy loss: -0.0306  | Total Loss: 5.82 | Total Steps: 500
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7323/200000  | Episode Reward: 1.0  | Average Reward 4.41  | Actor loss: -0.14 | Critic loss: 2.62 | Entropy loss: -0.0174  | Total Loss: 2.46 | Total Steps: 451
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7324/200000  | Episode Reward: -9.0  | Average Reward 4.37  | Actor loss: 0.02 | Critic loss: 2.65 | Entropy loss: -0.0201  | Total L

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7347/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.27 | Critic loss: 3.21 | Entropy loss: -0.0018  | Total Loss: 3.47 | Total Steps: 18
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7348/200000  | Episode Reward: 5.0  | Average Reward 4.36  | Actor loss: -0.59 | Critic loss: 3.51 | Entropy loss: -0.0318  | Total Loss: 2.89 | Total Steps: 184
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7349/200000  | Episode Reward: 4.0  | Average Reward 4.38  | Actor loss: -0.32 | Critic loss: 4.30 | Entropy loss: -0.0196  | Total Loss: 3.97 | Total Steps: 151
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7371/200000  | Episode Reward: -3.0  | Average Reward 3.88  | Actor loss: -0.80 | Critic loss: 5.60 | Entropy loss: -0.0245  | Total Loss: 4.77 | Total Steps: 168
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7372/200000  | Episode Reward: 0.0  | Average Reward 3.87  | Actor loss: -0.34 | Critic loss: 5.86 | Entropy loss: -0.0190  | Total Loss: 5.50 | Total Steps: 148
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7373/200000  | Episode Reward: 10.0  | Average Reward 3.87  | Actor loss: 1.73 | Critic loss: 6.15 | Entropy loss: -0.0041  | Total Loss: 7.87 | Total Steps: 20
--- target colour: gree

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7396/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.35 | Critic loss: 7.68 | Entropy loss: -0.0100  | Total Loss: 7.32 | Total Steps: 109
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7397/200000  | Episode Reward: 1.0  | Average Reward 3.95  | Actor loss: -0.22 | Critic loss: 4.68 | Entropy loss: -0.0125  | Total Loss: 4.44 | Total Steps: 181
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7398/200000  | Episode Reward: 7.0  | Average Reward 3.94  | Actor loss: -0.41 | Critic loss: 3.93 | Entropy loss: -0.0211  | Total Loss: 3.51 | Total Steps: 172
--- target colour: yellow, target object: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7421/200000  | Episode Reward: 4.0  | Average Reward 3.83  | Actor loss: -0.10 | Critic loss: 8.96 | Entropy loss: -0.0075  | Total Loss: 8.85 | Total Steps: 83
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7422/200000  | Episode Reward: -1.0  | Average Reward 3.80  | Actor loss: -0.64 | Critic loss: 5.59 | Entropy loss: -0.0264  | Total Loss: 4.92 | Total Steps: 185
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7423/200000  | Episode Reward: 10.0  | Average Reward 3.88  | Actor loss: 0.38 | Critic loss: 3.78 | Entropy loss: -0.0020  | Total Loss: 4.16 | Total Steps: 18
--- target colour: red, target object: cylinder ---
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7447/200000  | Episode Reward: 7.0  | Average Reward 3.85  | Actor loss: -0.07 | Critic loss: 3.06 | Entropy loss: -0.0125  | Total Loss: 2.98 | Total Steps: 148
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7448/200000  | Episode Reward: 3.0  | Average Reward 3.83  | Actor loss: -0.33 | Critic loss: 5.67 | Entropy loss: -0.0150  | Total Loss: 5.33 | Total Steps: 117
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7449/200000  | Episode Reward: 1.0  | Average Reward 3.78  | Actor loss: -0.29 | Critic loss: 5.46 | Entropy loss: -0.0092  | Total Loss: 5.17 | Total Steps: 141
--- target colour: black, target object: cube

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7471/200000  | Episode Reward: 10.0  | Average Reward 3.50  | Actor loss: 0.11 | Critic loss: 4.00 | Entropy loss: -0.0103  | Total Loss: 4.11 | Total Steps: 95
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7472/200000  | Episode Reward: 1.0  | Average Reward 3.46  | Actor loss: -0.19 | Critic loss: 4.79 | Entropy loss: -0.0196  | Total Loss: 4.58 | Total Steps: 185
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7473/200000  | Episode Reward: 1.0  | Average Reward 3.60  | Actor loss: -0.33 | Critic loss: 5.19 | Entropy loss: -0.0242  | Total Loss: 4.83 | Total Steps: 160
--- target colour: blue, target object: sphere 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7497/200000  | Episode Reward: -2.0  | Average Reward 3.77  | Actor loss: -0.97 | Critic loss: 5.22 | Entropy loss: -0.0264  | Total Loss: 4.22 | Total Steps: 151
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7498/200000  | Episode Reward: 1.0  | Average Reward 3.73  | Actor loss: -0.46 | Critic loss: 5.06 | Entropy loss: -0.0161  | Total Loss: 4.59 | Total Steps: 171
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7499/200000  | Episode Reward: 9.0  | Average Reward 3.75  | Actor loss: -0.27 | Critic loss: 3.90 | Entropy loss: -0.0089  | Total Loss: 3.63 | Total Steps: 106
--- target colou

--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7523/200000  | Episode Reward: 7.0  | Average Reward 4.09  | Actor loss: -0.18 | Critic loss: 3.13 | Entropy loss: -0.0172  | Total Loss: 2.93 | Total Steps: 158
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7524/200000  | Episode Reward: 10.0  | Average Reward 4.19  | Actor loss: -0.13 | Critic loss: 2.28 | Entropy loss: -0.0135  | Total Loss: 2.13 | Total Steps: 141
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7525/200000  | Episode Reward: -2.0  | Average Reward 4.13  | Actor loss: -0.53 | Critic loss: 6.06 | Entropy loss: -0.0157  | Total Loss: 5.51 | Total Steps: 153
--- target colour:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7547/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: 0.01 | Critic loss: 4.14 | Entropy loss: -0.0167  | Total Loss: 4.13 | Total Steps: 119
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7548/200000  | Episode Reward: 7.0  | Average Reward 4.09  | Actor loss: 0.02 | Critic loss: 11.14 | Entropy loss: -0.0001  | Total Loss: 11.15 | Total Steps: 258
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7549/200000  | Episode Reward: 1.0  | Average Reward 4.08  | Actor loss: -0.42 | Critic loss: 4.61 | Entropy loss: -0.0113  | Total Loss: 4.18 | Total Steps: 167
--- target colour: blue, target object: prism ---
Agent i

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7573/200000  | Episode Reward: 4.0  | Average Reward 4.64  | Actor loss: -0.24 | Critic loss: 5.63 | Entropy loss: -0.0091  | Total Loss: 5.38 | Total Steps: 129
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7574/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.53 | Critic loss: 4.41 | Entropy loss: -0.0183  | Total Loss: 3.87 | Total Steps: 118
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7575/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: 0.43 | Critic loss: 3.51 | Entropy loss: -0.0015  | Total Loss: 3.95 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7597/200000  | Episode Reward: 4.0  | Average Reward 4.51  | Actor loss: -0.67 | Critic loss: 6.01 | Entropy loss: -0.0136  | Total Loss: 5.33 | Total Steps: 106
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7598/200000  | Episode Reward: 1.0  | Average Reward 4.48  | Actor loss: -0.38 | Critic loss: 5.42 | Entropy loss: -0.0139  | Total Loss: 5.02 | Total Steps: 162
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7599/200000  | Episode Reward: 1.0  | Average Reward 4.43  | Actor loss: -0.27 | Critic loss: 4.94 | Entropy loss: -0.0155  | Total Loss: 4.65 | Total Steps: 159
--- target colour: black, target object: caps

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7622/200000  | Episode Reward: 7.0  | Average Reward 4.30  | Actor loss: -0.35 | Critic loss: 6.27 | Entropy loss: -0.0101  | Total Loss: 5.92 | Total Steps: 95
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7623/200000  | Episode Reward: -8.0  | Average Reward 4.21  | Actor loss: 0.12 | Critic loss: 5.53 | Entropy loss: -0.0016  | Total Loss: 5.65 | Total Steps: 287
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7624/200000  | Episode Reward: 4.0  | Average Reward 4.20  | Actor loss: -0.23 | Critic loss: 5.65 | Entropy loss: -0.0128  

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7648/200000  | Episode Reward: 10.0  | Average Reward 4.36  | Actor loss: -0.16 | Critic loss: 4.20 | Entropy loss: -0.0215  | Total Loss: 4.01 | Total Steps: 110
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7649/200000  | Episode Reward: -8.0  | Average Reward 4.32  | Actor loss: 0.03 | Critic loss: 11.87 | Entropy loss: -0.0001  | Total Loss: 11.91 | Total Steps: 251
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7650/200000  | Episode Reward: 1.0  | Average Reward 4.30  | Actor loss: -0.45 | Critic loss: 5.93 | Entropy loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7671/200000  | Episode Reward: 1.0  | Average Reward 4.16  | Actor loss: -0.15 | Critic loss: 5.66 | Entropy loss: -0.0200  | Total Loss: 5.50 | Total Steps: 172
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7672/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.66 | Critic loss: 4.10 | Entropy loss: -0.0015  | Total Loss: 4.76 | Total Steps: 17
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7673/200000  | Episode Reward: 10.0  | Average Reward 4.25  | Actor loss: -0.05 | Critic loss: 3.15 | Entropy loss: -0.0176  | Total Loss: 3.09 | Total Steps: 117
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7697/200000  | Episode Reward: -2.0  | Average Reward 4.29  | Actor loss: -0.32 | Critic loss: 5.60 | Entropy loss: -0.0249  | Total Loss: 5.26 | Total Steps: 198
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7698/200000  | Episode Reward: 10.0  | Average Reward 4.33  | Actor loss: 0.10 | Critic loss: 3.22 | Entropy loss: -0.0004  | Total Loss: 3.32 | Total Steps: 17
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7699/200000  | Episode Reward: 1.0  | Average Reward 4.29  | Actor loss: -0.40 | Critic loss: 4.69 | Entropy loss: -0.0263  | Total Loss: 4.27 | Total Steps: 161
--- target colour: r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7722/200000  | Episode Reward: 10.0  | Average Reward 4.29  | Actor loss: 0.55 | Critic loss: 3.50 | Entropy loss: -0.0017  | Total Loss: 4.04 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7723/200000  | Episode Reward: 3.0  | Average Reward 4.28  | Actor loss: -0.31 | Critic loss: 5.64 | Entropy loss: -0.0357  | Total Loss: 5.30 | Total Steps: 214
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7724/200000  | Episode Reward: -11.0  | Average Reward 4.17  | Actor loss: 0.28 | Critic loss: 4.37 | Entropy loss: -0.0

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7747/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.19 | Critic loss: 2.93 | Entropy loss: -0.0019  | Total Loss: 3.11 | Total Steps: 22
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7748/200000  | Episode Reward: 1.0  | Average Reward 4.39  | Actor loss: -0.36 | Critic loss: 6.05 | Entropy loss: -0.0218  | Total Loss: 5.68 | Total Steps: 153
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7749/200000  | Episode Reward: 7.0  | Average Reward 4.42  | Actor loss: -0.30 | Critic loss: 3.30 | Entropy loss: -0.0137  | Total Loss: 2.98 | Total Steps: 145
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7773/200000  | Episode Reward: 1.0  | Average Reward 4.21  | Actor loss: -0.14 | Critic loss: 4.59 | Entropy loss: -0.0146  | Total Loss: 4.44 | Total Steps: 139
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7774/200000  | Episode Reward: 1.0  | Average Reward 4.19  | Actor loss: -0.13 | Critic loss: 6.05 | Entropy loss: -0.0093  | Total Loss: 5.92 | Total Steps: 142
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7775/200000  | Episode Reward: 1.0  | Average Reward 4.14  | Actor loss: -0.50 | Critic loss: 4.95 | Entropy loss: -0.0209  | Tota

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7797/200000  | Episode Reward: -2.0  | Average Reward 4.01  | Actor loss: 0.76 | Critic loss: 6.62 | Entropy loss: -0.0062  | Total Loss: 7.37 | Total Steps: 298
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7798/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: 0.71 | Critic loss: 6.08 | Entropy loss: -0.0076  | Total Loss: 6.78 | Total Steps: 78
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7799/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: 0.36 | Critic loss: 4.62 | Entropy loss: -0.0176  | Total Loss: 4.97 | Total Steps: 112
--- target colour: blue, target object: cube ---
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7822/200000  | Episode Reward: 10.0  | Average Reward 4.13  | Actor loss: 0.29 | Critic loss: 4.56 | Entropy loss: -0.0090  | Total Loss: 4.84 | Total Steps: 82
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7823/200000  | Episode Reward: -5.0  | Average Reward 4.14  | Actor loss: 2.37 | Critic loss: 18.84 | Entropy loss: -0.0026  | Total Loss: 21.21 | Total Steps: 264
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7824/200000  | Episode Reward: -3.0  | Average Reward 4.11  | Actor loss: -0.45 | Critic loss: 6.24 | Entropy loss: -0.02

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7847/200000  | Episode Reward: 1.0  | Average Reward 3.96  | Actor loss: -0.27 | Critic loss: 6.27 | Entropy loss: -0.0207  | Total Loss: 5.97 | Total Steps: 130
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7848/200000  | Episode Reward: 4.0  | Average Reward 3.92  | Actor loss: -0.09 | Critic loss: 6.46 | Entropy loss: -0.0075  | Total Loss: 6.37 | Total Steps: 102
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7849/200000  | Episode Reward: 10.0  | Average Reward 4.01  | Actor loss: 0.10 | Critic loss: 2.68 | Entropy loss: -0.0227  | Total Loss: 2.76 | Total Steps: 129
--- target colour: blue, target object: sphere ---
Agent in termina

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7872/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.15 | Critic loss: 3.42 | Entropy loss: -0.0177  | Total Loss: 3.55 | Total Steps: 104
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7873/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.01 | Critic loss: 1.94 | Entropy loss: -0.0200  | Total Loss: 1.93 | Total Steps: 150
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7874/200000  | Episode Reward: 7.0  | Average Reward 4.41  | Actor loss: -0.40 | Critic loss: 2.41 | Entropy loss: -0.0235  | Total Loss: 1.99 | Total Steps: 121
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7875/200000  | Episode Reward: 10.0  | Average R

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7898/200000  | Episode Reward: 10.0  | Average Reward 4.32  | Actor loss: 0.12 | Critic loss: 6.29 | Entropy loss: -0.0011  | Total Loss: 6.41 | Total Steps: 23
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7899/200000  | Episode Reward: -2.0  | Average Reward 4.30  | Actor loss: -0.52 | Critic loss: 6.22 | Entropy loss: -0.0176  | Total Loss: 5.68 | Total Steps: 167
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7900/200000  | Episode Reward: 10.0  | Average Reward 4.35  | Actor loss: 0.50 | Critic loss: 4.56 | Entropy loss: -0.0135  | Total Loss: 5.04 | Total Steps: 87
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7923/200000  | Episode Reward: 10.0  | Average Reward 4.19  | Actor loss: 0.55 | Critic loss: 6.73 | Entropy loss: -0.0033  | Total Loss: 7.27 | Total Steps: 23
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7924/200000  | Episode Reward: -14.0  | Average Reward 4.17  | Actor loss: 0.42 | Critic loss: 3.84 | Entropy loss: -0.0034  | Total Loss: 4.26 | Total Steps: 312
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7925/200000  | Episode Reward: 10.0  | Average Reward 4.22  | Actor loss: 0.68 | Critic loss: 5.96 | Entropy loss: -0.0035  | Total Loss: 6.64 | T

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7948/200000  | Episode Reward: 7.0  | Average Reward 4.02  | Actor loss: -0.21 | Critic loss: 3.99 | Entropy loss: -0.0151  | Total Loss: 3.77 | Total Steps: 120
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7949/200000  | Episode Reward: 10.0  | Average Reward 4.04  | Actor loss: -0.08 | Critic loss: 2.45 | Entropy loss: -0.0386  | Total Loss: 2.34 | Total Steps: 229
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7950/200000  | Episode Reward: 10.0  | Average Reward 4.04  | Actor loss: 0.51 | Critic loss: 7.02 | Entropy loss: -0.0011  | Total Loss: 7.52 | Total Steps: 21
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7951/200000  | Ep

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7974/200000  | Episode Reward: 7.0  | Average Reward 4.22  | Actor loss: -0.23 | Critic loss: 3.27 | Entropy loss: -0.0174  | Total Loss: 3.02 | Total Steps: 137
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7975/200000  | Episode Reward: 9.0  | Average Reward 4.26  | Actor loss: -0.29 | Critic loss: 2.76 | Entropy loss: -0.0160  | Total Loss: 2.45 | Total Steps: 166
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 7976/200000  | Episode Reward: 3.0  | Average Reward 4.49  | Actor loss: -0.37 | Critic loss: 3.57 | Entropy loss: -0.0243  | Total Loss: 3.17 | Total Steps: 167
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decisio

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8000/200000  | Episode Reward: 4.0  | Average Reward 4.52  | Actor loss: -0.24 | Critic loss: 5.53 | Entropy loss: -0.0121  | Total Loss: 5.28 | Total Steps: 136
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8001/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.51 | Critic loss: 4.35 | Entropy loss: -0.0203  | Total Loss: 4.84 | Total Steps: 92
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8002/200000  | Episode Reward: 10.0  | Average Reward 4.59  | Actor loss: 0.39 | Critic loss: 6.16 | Entropy loss: -0.0027  | Total Loss: 6.55 | Total Steps: 23
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8025/200000  | Episode Reward: -2.0  | Average Reward 4.64  | Actor loss: -0.76 | Critic loss: 8.02 | Entropy loss: -0.0199  | Total Loss: 7.23 | Total Steps: 131
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8026/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.21 | Critic loss: 4.52 | Entropy loss: -0.0097  | Total Loss: 4.30 | Total Steps: 87
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8027/200000  | Episode Reward: 0.0  | Average Reward 4.57  | Actor loss: -0.70 | Critic loss: 5.25 | Entropy loss: -0.0208  | Total Los

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8051/200000  | Episode Reward: -14.0  | Average Reward 4.62  | Actor loss: -0.20 | Critic loss: 6.19 | Entropy loss: -0.0050  | Total Loss: 5.99 | Total Steps: 318
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8052/200000  | Episode Reward: 10.0  | Average Reward 4.62  | Actor loss: 0.69 | Critic loss: 4.12 | Entropy loss: -0.0125  | Total Loss: 4.80 | Total Steps: 92
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8053/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: 0.46 | Critic loss: 8.77 | Entropy loss: -0.0025  | Total Loss: 9.22 | Total

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8076/200000  | Episode Reward: 4.0  | Average Reward 4.72  | Actor loss: -0.30 | Critic loss: 5.51 | Entropy loss: -0.0153  | Total Loss: 5.20 | Total Steps: 105
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8077/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: 0.40 | Critic loss: 3.37 | Entropy loss: -0.0138  | Total Loss: 3.76 | Total Steps: 86
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8078/200000  | Episode Reward: 0.0  | Average Reward 4.67  | Actor loss: -0.48 | Critic loss: 4.01 | Entropy loss: -0.0196  | Total Loss: 3.50 | Total Steps: 164
--- target colour: red, target object: prism ---
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8102/200000  | Episode Reward: 7.0  | Average Reward 4.79  | Actor loss: -0.09 | Critic loss: 2.57 | Entropy loss: -0.0287  | Total Loss: 2.45 | Total Steps: 142
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8103/200000  | Episode Reward: 7.0  | Average Reward 4.80  | Actor loss: -0.13 | Critic loss: 2.65 | Entropy loss: -0.0132  | Total Loss: 2.50 | Total Steps: 151
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8104/200000  | Episode Reward: 10.0  | Average Reward 4.88  | Actor loss: 0.86 | Critic loss: 10.67 | Entropy loss: -0.0034  | Total Loss: 11.53 | Total Steps: 26
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8105/200000  | Episod

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8128/200000  | Episode Reward: 10.0  | Average Reward 5.07  | Actor loss: 0.84 | Critic loss: 9.86 | Entropy loss: -0.0018  | Total Loss: 10.70 | Total Steps: 16
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8129/200000  | Episode Reward: 4.0  | Average Reward 5.07  | Actor loss: -0.16 | Critic loss: 3.01 | Entropy loss: -0.0230  | Total Loss: 2.82 | Total Steps: 160
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8130/200000  | Episode Reward: -8.0  | Average Reward 4.99  | Actor loss: -1.11 | Critic loss: 11.77 | Entropy loss: -0.0186  | Total Loss: 10.64 | Total St

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8154/200000  | Episode Reward: 7.0  | Average Reward 5.09  | Actor loss: -0.21 | Critic loss: 3.67 | Entropy loss: -0.0248  | Total Loss: 3.44 | Total Steps: 187
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8155/200000  | Episode Reward: 10.0  | Average Reward 5.16  | Actor loss: 0.35 | Critic loss: 8.12 | Entropy loss: -0.0097  | Total Loss: 8.47 | Total Steps: 52
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8156/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: 1.42 | Critic loss: 9.26 | Entropy loss: -0.0019  | Total Loss: 10.67 | Total Steps: 21
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8181/200000  | Episode Reward: -2.0  | Average Reward 5.28  | Actor loss: -0.63 | Critic loss: 7.88 | Entropy loss: -0.0123  | Total Loss: 7.24 | Total Steps: 145
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8182/200000  | Episode Reward: 4.0  | Average Reward 5.29  | Actor loss: -0.64 | Critic loss: 5.68 | Entropy loss: -0.0247  | Total Loss: 5.02 | Total Steps: 248
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8183/200000  | Episode Reward: 10.0  | Average Reward 5.30  | Actor loss: -0.38 | Critic loss: 2.23 | Entropy loss: -0.0208  | Total Loss: 1.82 | Total Steps: 138
--- target colour: blue, target object: sph

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8205/200000  | Episode Reward: 10.0  | Average Reward 5.15  | Actor loss: -0.18 | Critic loss: 2.13 | Entropy loss: -0.0107  | Total Loss: 1.93 | Total Steps: 112
--- target colour: red, target object: prism ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8206/200000  | Episode Reward: 7.0  | Average Reward 5.15  | Actor loss: 0.10 | Critic loss: 11.37 | Entropy loss: -0.0002  | Total Loss: 11.47 | Total Steps: 258
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8207/200000  | Episode Reward: 4.0  | Average Reward 5.17  | Actor loss: -0.57 | Critic loss: 4.67 | Entropy loss: -0.0150  | Total Loss: 4.09 | Total Steps: 171
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Traini

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8230/200000  | Episode Reward: 10.0  | Average Reward 5.21  | Actor loss: 0.91 | Critic loss: 6.16 | Entropy loss: -0.0157  | Total Loss: 7.06 | Total Steps: 77
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8231/200000  | Episode Reward: 10.0  | Average Reward 5.25  | Actor loss: 0.56 | Critic loss: 6.64 | Entropy loss: -0.0012  | Total Loss: 7.20 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8232/200000  | Episode Reward: -2.0  | Average Reward 5.18  | Actor loss: -0.29 | Critic loss: 5.73 | Entropy loss: -0.0155  | Total Loss: 5.42 | Total Steps: 187
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8254/200000  | Episode Reward: 1.0  | Average Reward 4.62  | Actor loss: -0.59 | Critic loss: 4.86 | Entropy loss: -0.0159  | Total Loss: 4.25 | Total Steps: 149
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8255/200000  | Episode Reward: 7.0  | Average Reward 4.64  | Actor loss: -0.33 | Critic loss: 2.45 | Entropy loss: -0.0203  | Total Loss: 2.10 | Total Steps: 178
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8256/200000  | Episode Reward: 10.0  | Average Reward 4.66  | Actor loss: 0.16 | Critic loss: 4.65 | Entropy loss: -0.0008  | Total Loss: 4.81 | Total Steps: 21
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8279/200000  | Episode Reward: 1.0  | Average Reward 4.67  | Actor loss: -0.71 | Critic loss: 5.66 | Entropy loss: -0.0250  | Total Loss: 4.92 | Total Steps: 167
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8280/200000  | Episode Reward: 7.0  | Average Reward 4.65  | Actor loss: -0.11 | Critic loss: 2.84 | Entropy loss: -0.0104  | Total Loss: 2.72 | Total Steps: 106
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8281/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.29 | Critic loss: 4.74 | Entropy loss: -0.0159  | Total Loss: 4.44 | Total Steps: 226
--- target colour: black, target object: capsule ---
Agent in termina

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8305/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 0.29 | Critic loss: 3.26 | Entropy loss: -0.0011  | Total Loss: 3.55 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8306/200000  | Episode Reward: 10.0  | Average Reward 4.61  | Actor loss: 0.09 | Critic loss: 3.57 | Entropy loss: -0.0005  | Total Loss: 3.66 | Total Steps: 21
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8307/200000  | Episode Reward: 1.0  | Average Reward 4.59  | Actor loss: -0.40 | Critic loss: 5.85 | Entropy loss: -0.0195  | Total Loss: 5.43 | Total Steps: 139
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8329/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.64 | Critic loss: 4.03 | Entropy loss: -0.0016  | Total Loss: 4.66 | Total Steps: 21
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8330/200000  | Episode Reward: 7.0  | Average Reward 4.27  | Actor loss: -0.24 | Critic loss: 3.41 | Entropy loss: -0.0201  | Total Loss: 3.15 | Total Steps: 163
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8331/200000  | Episode Reward: 10.0  | Average Reward 4.27  | Actor loss: 0.40 | Critic loss: 3.53 | Entropy loss: -0.0012  | Total Loss: 3.92 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8353/200000  | Episode Reward: 4.0  | Average Reward 3.71  | Actor loss: -0.03 | Critic loss: 5.44 | Entropy loss: -0.0155  | Total Loss: 5.40 | Total Steps: 121
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8354/200000  | Episode Reward: 4.0  | Average Reward 3.69  | Actor loss: -0.07 | Critic loss: 4.47 | Entropy loss: -0.0157  | Total Loss: 4.38 | Total Steps: 133
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8355/200000  | Episode Reward: 7.0  | Average Reward 3.68  | Actor loss: 0.20 | Critic loss: 8.88 | Entropy loss: -0.0079  | Total Loss: 9.07 | Total Steps: 78
--- target colour: black, target object: cube ---
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8378/200000  | Episode Reward: 7.0  | Average Reward 3.50  | Actor loss: -0.51 | Critic loss: 3.49 | Entropy loss: -0.0209  | Total Loss: 2.95 | Total Steps: 164
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8379/200000  | Episode Reward: 4.0  | Average Reward 3.47  | Actor loss: 0.05 | Critic loss: 5.35 | Entropy loss: -0.0164  | Total Loss: 5.38 | Total Steps: 108
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8380/200000  | Episode Reward: 7.0  | Average Reward 3.46  | Actor loss: -0.09 | Critic loss: 3.66 | Entropy loss: -0.0111  | Total Loss: 3.56 | Total Steps: 117
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8403/200000  | Episode Reward: -11.0  | Average Reward 3.48  | Actor loss: -0.25 | Critic loss: 4.75 | Entropy loss: -0.0098  | Total Loss: 4.49 | Total Steps: 378
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8404/200000  | Episode Reward: 1.0  | Average Reward 3.48  | Actor loss: -0.46 | Critic loss: 6.31 | Entropy loss: -0.0109  | Total Loss: 5.84 | Total Steps: 141
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8405/200000  | Episode Reward: 7.0  | Average Reward 3.47  | Actor loss: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8428/200000  | Episode Reward: 4.0  | Average Reward 3.57  | Actor loss: 0.02 | Critic loss: 4.52 | Entropy loss: -0.0128  | Total Loss: 4.53 | Total Steps: 117
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8429/200000  | Episode Reward: 4.0  | Average Reward 3.57  | Actor loss: -0.11 | Critic loss: 4.24 | Entropy loss: -0.0113  | Total Loss: 4.12 | Total Steps: 125
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8430/200000  | Episode Reward: 10.0  | Average Reward 3.57  | Actor loss: 0.24 | Critic loss: 5.07 | Entropy loss: -0.0021  | Total Loss: 5.30 | Total Steps: 23
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8454/200000  | Episode Reward: 7.0  | Average Reward 4.22  | Actor loss: 0.02 | Critic loss: 5.92 | Entropy loss: -0.0058  | Total Loss: 5.94 | Total Steps: 91
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8455/200000  | Episode Reward: 10.0  | Average Reward 4.24  | Actor loss: 1.40 | Critic loss: 3.58 | Entropy loss: -0.0030  | Total Loss: 4.97 | Total Steps: 18
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8456/200000  | Episode Reward: -2.0  | Average Reward 4.18  | Actor loss: 0.01 | Critic loss: 15.65 | Entropy loss: -0.0001  | Total Loss: 15.67 | Total Steps: 254
--- target colour: blue, target object: capsule ---
Agent in

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8479/200000  | Episode Reward: 4.0  | Average Reward 4.10  | Actor loss: -0.13 | Critic loss: 3.36 | Entropy loss: -0.0165  | Total Loss: 3.21 | Total Steps: 177
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8480/200000  | Episode Reward: 7.0  | Average Reward 4.10  | Actor loss: -0.21 | Critic loss: 3.03 | Entropy loss: -0.0152  | Total Loss: 2.80 | Total Steps: 132
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8481/200000  | Episode Reward: 10.0  | Average Reward 4.13  | Actor loss: 0.55 | Critic loss: 4.22 | Entropy loss: -0.0020  | Total Loss: 4.77 | Total Steps: 18
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8504/200000  | Episode Reward: 7.0  | Average Reward 4.03  | Actor loss: -0.22 | Critic loss: 3.02 | Entropy loss: -0.0181  | Total Loss: 2.78 | Total Steps: 193
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8505/200000  | Episode Reward: 1.0  | Average Reward 3.98  | Actor loss: -0.17 | Critic loss: 4.47 | Entropy loss: -0.0173  | Total Loss: 4.27 | Total Steps: 143
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8506/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.42 | Critic loss: 3.78 | Entropy loss: -0.0151  | Total Loss: 3.35 | Total Steps: 144
--- target colour: blue, target object: s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8529/200000  | Episode Reward: 4.0  | Average Reward 4.30  | Actor loss: -0.66 | Critic loss: 4.17 | Entropy loss: -0.0193  | Total Loss: 3.49 | Total Steps: 137
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8530/200000  | Episode Reward: 4.0  | Average Reward 4.29  | Actor loss: -0.23 | Critic loss: 4.92 | Entropy loss: -0.0131  | Total Loss: 4.68 | Total Steps: 118
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8531/200000  | Episode Reward: 10.0  | Average Reward 4.29  | Actor loss: 0.55 | Critic loss: 9.56 | Entropy loss: -0.0063  | Total Loss: 10.10 | Total Steps: 65
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8555/200000  | Episode Reward: 10.0  | Average Reward 4.96  | Actor loss: 0.02 | Critic loss: 3.60 | Entropy loss: -0.0106  | Total Loss: 3.61 | Total Steps: 103
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8556/200000  | Episode Reward: 4.0  | Average Reward 4.94  | Actor loss: -0.28 | Critic loss: 4.49 | Entropy loss: -0.0174  | Total Loss: 4.19 | Total Steps: 182
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8557/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.32 | Critic loss: 3.47 | Entropy loss: -0.0011  | Total Loss: 3.79 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8558/200000  | Episo

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8579/200000  | Episode Reward: 4.0  | Average Reward 4.74  | Actor loss: -0.21 | Critic loss: 4.70 | Entropy loss: -0.0140  | Total Loss: 4.48 | Total Steps: 110
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8580/200000  | Episode Reward: 4.0  | Average Reward 4.72  | Actor loss: -0.91 | Critic loss: 6.88 | Entropy loss: -0.0114  | Total Loss: 5.97 | Total Steps: 97
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8581/200000  | Episode Reward: -2.0  | Average Reward 4.69  | Actor loss: -0.43 | Critic loss: 8.32 | Entropy loss: -0.0127  | Total Loss: 7.87 | Total Steps

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8604/200000  | Episode Reward: 1.0  | Average Reward 4.75  | Actor loss: -0.27 | Critic loss: 6.30 | Entropy loss: -0.0110  | Total Loss: 6.01 | Total Steps: 110
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8605/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0.06 | Critic loss: 6.00 | Entropy loss: -0.0002  | Total Loss: 6.06 | Total Steps: 17
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8606/200000  | Episode Reward: 10.0  | Average Reward 4.79  | Actor loss: 0.45 | Critic loss: 4.36 | Entropy loss: -0.0016  | Total Loss: 4.82 | Total Steps: 17
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Te

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8629/200000  | Episode Reward: 1.0  | Average Reward 4.45  | Actor loss: -0.23 | Critic loss: 4.79 | Entropy loss: -0.0123  | Total Loss: 4.54 | Total Steps: 156
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8630/200000  | Episode Reward: -2.0  | Average Reward 4.38  | Actor loss: -0.38 | Critic loss: 6.44 | Entropy loss: -0.0245  | Total Loss: 6.03 | Total Steps: 188
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8631/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.31 | Critic loss: 3.23 | Entropy loss: -0.0019  | Total Loss: 3.53 | Total Steps: 18
--- target colou

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8652/200000  | Episode Reward: 7.0  | Average Reward 3.94  | Actor loss: -0.14 | Critic loss: 1.75 | Entropy loss: -0.0160  | Total Loss: 1.60 | Total Steps: 152
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8653/200000  | Episode Reward: 10.0  | Average Reward 3.94  | Actor loss: 0.08 | Critic loss: 3.63 | Entropy loss: -0.0167  | Total Loss: 3.69 | Total Steps: 112
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8654/200000  | Episode Reward: -5.0  | Average Reward 3.88  | Actor loss: -0.49 | Critic loss: 6.14 | Entropy loss: -0.0196  | Total Loss: 5.64 | Total Steps: 166
--- target colour: green, target object: sphere ---
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8677/200000  | Episode Reward: 10.0  | Average Reward 3.94  | Actor loss: 0.33 | Critic loss: 4.32 | Entropy loss: -0.0016  | Total Loss: 4.64 | Total Steps: 19
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8678/200000  | Episode Reward: 10.0  | Average Reward 3.98  | Actor loss: 0.02 | Critic loss: 2.66 | Entropy loss: -0.0157  | Total Loss: 2.67 | Total Steps: 128
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8679/200000  | Episode Reward: 1.0  | Average Reward 3.96  | Actor loss: -0.45 | Critic loss: 5.61 | Entropy loss: -0.0132  | Total Loss: 5.14 | Total Steps: 148
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8703/200000  | Episode Reward: 0.0  | Average Reward 3.97  | Actor loss: -0.61 | Critic loss: 4.25 | Entropy loss: -0.0203  | Total Loss: 3.62 | Total Steps: 143
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8704/200000  | Episode Reward: 10.0  | Average Reward 3.98  | Actor loss: -0.26 | Critic loss: 2.17 | Entropy loss: -0.0194  | Total Loss: 1.89 | Total Steps: 152
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8705/200000  | Episode Reward: 10.0  | Average Reward 4.03  | Actor loss: 0.09 | Critic loss: 6.45 | Entropy loss: -0.0003  | Total Loss: 6.54 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8729/200000  | Episode Reward: 7.0  | Average Reward 4.16  | Actor loss: 0.99 | Critic loss: 7.26 | Entropy loss: -0.0030  | Total Loss: 8.25 | Total Steps: 272
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8730/200000  | Episode Reward: 4.0  | Average Reward 4.16  | Actor loss: -0.49 | Critic loss: 3.66 | Entropy loss: -0.0239  | Total Loss: 3.14 | Total Steps: 192
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8731/200000  | Episode Reward: 10.0  | Average Reward 4.16  | Actor loss: 0.39 | Critic loss: 4.29 | Entropy loss: -0.0013  | Total Loss: 4.68 | Total Steps: 17
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8754/200000  | Episode Reward: 4.0  | Average Reward 3.96  | Actor loss: -0.01 | Critic loss: 5.31 | Entropy loss: -0.0053  | Total Loss: 5.29 | Total Steps: 110
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8755/200000  | Episode Reward: 7.0  | Average Reward 3.94  | Actor loss: -0.16 | Critic loss: 3.63 | Entropy loss: -0.0118  | Total Loss: 3.46 | Total Steps: 166
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8756/200000  | Episode Reward: -2.0  | Average Reward 3.91  | Actor loss: -0.27 | Critic loss: 5.90 | Entropy loss: -0.0192  | Total Loss: 5.61 | Total Steps: 156
--- target colour: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8780/200000  | Episode Reward: 4.0  | Average Reward 4.28  | Actor loss: -0.27 | Critic loss: 5.65 | Entropy loss: -0.0116  | Total Loss: 5.37 | Total Steps: 105
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8781/200000  | Episode Reward: 10.0  | Average Reward 4.34  | Actor loss: -0.04 | Critic loss: 3.04 | Entropy loss: -0.0110  | Total Loss: 2.99 | Total Steps: 108
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8782/200000  | Episode Reward: 4.0  | Average Reward 4.31  | Actor loss: -0.20 | Critic loss: 5.67 | Entropy loss: -0.0174  | Total Loss: 5.45 | Total Steps: 110
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8805/200000  | Episode Reward: -5.0  | Average Reward 4.30  | Actor loss: -0.44 | Critic loss: 7.34 | Entropy loss: -0.0162  | Total Loss: 6.88 | Total Steps: 171
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8806/200000  | Episode Reward: 10.0  | Average Reward 4.30  | Actor loss: 0.47 | Critic loss: 3.77 | Entropy loss: -0.0015  | Total Loss: 4.24 | Total Steps: 16
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8807/200000  | Episode Reward: -2.0  | Average Reward 4.27  | Actor loss: -0.44 | Critic loss: 5.03 | Entropy loss: -0.0180  | Total

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8831/200000  | Episode Reward: 4.0  | Average Reward 4.73  | Actor loss: -0.25 | Critic loss: 5.60 | Entropy loss: -0.0133  | Total Loss: 5.33 | Total Steps: 103
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8832/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0.30 | Critic loss: 2.86 | Entropy loss: -0.0014  | Total Loss: 3.16 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8833/200000  | Episode Reward: -2.0  | Average Reward 4.70  | Actor loss: -0.67 | Critic loss: 7.03 | Entropy loss: -0.0137  | Total Loss: 6.35 | Total Steps: 138
--- target colour: yellow, target object: c

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8856/200000  | Episode Reward: 10.0  | Average Reward 5.13  | Actor loss: 0.33 | Critic loss: 2.86 | Entropy loss: -0.0014  | Total Loss: 3.19 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8857/200000  | Episode Reward: 9.0  | Average Reward 5.12  | Actor loss: -0.15 | Critic loss: 2.98 | Entropy loss: -0.0172  | Total Loss: 2.81 | Total Steps: 160
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8858/200000  | Episode Reward: -2.0  | Average Reward 5.07  | Actor loss: -0.49 | Critic loss: 7.11 | Entropy loss: -0.0209  | Total Loss: 6.60 | Total Steps: 162
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8882/200000  | Episode Reward: 4.0  | Average Reward 5.22  | Actor loss: -0.07 | Critic loss: 5.52 | Entropy loss: -0.0228  | Total Loss: 5.42 | Total Steps: 116
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8883/200000  | Episode Reward: -2.0  | Average Reward 5.21  | Actor loss: -0.59 | Critic loss: 5.03 | Entropy loss: -0.0250  | Total Loss: 4.42 | Total Steps: 245
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8884/200000  | Episode Reward: 6.0  | Average Reward 5.23  | Actor loss: -0.50 | Critic loss: 3.76 | Entropy loss: -0.0258  | Total Loss: 3.23 | Total Steps: 196
--- target colour

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8908/200000  | Episode Reward: -11.0  | Average Reward 5.21  | Actor loss: 0.64 | Critic loss: 3.07 | Entropy loss: -0.0058  | Total Loss: 3.71 | Total Steps: 295
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8909/200000  | Episode Reward: 7.0  | Average Reward 5.21  | Actor loss: -0.15 | Critic loss: 3.42 | Entropy loss: -0.0047  | Total Loss: 3.26 | Total Steps: 107
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8910/200000  | Episode Reward: 1.0  | Average Reward 5.17  | Actor loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8932/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.32 | Critic loss: 3.18 | Entropy loss: -0.0014  | Total Loss: 3.51 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8933/200000  | Episode Reward: 1.0  | Average Reward 4.97  | Actor loss: -0.43 | Critic loss: 6.16 | Entropy loss: -0.0156  | Total Loss: 5.72 | Total Steps: 135
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8934/200000  | Episode Reward: 10.0  | Average Reward 5.02  | Actor loss: 0.60 | Critic loss: 8.70 | Entropy loss: -0.0052  | Total Loss: 9.29 | Total Steps: 56
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episod

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8957/200000  | Episode Reward: -2.0  | Average Reward 5.02  | Actor loss: -0.51 | Critic loss: 4.28 | Entropy loss: -0.0196  | Total Loss: 3.75 | Total Steps: 181
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8958/200000  | Episode Reward: 7.0  | Average Reward 5.04  | Actor loss: 0.17 | Critic loss: 5.21 | Entropy loss: -0.0139  | Total Loss: 5.37 | Total Steps: 96
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8959/200000  | Episode Reward: 1.0  | Average Reward 4.99  | Actor loss: -0.20 | Critic loss: 5.01 | Entropy loss: -0.0170  | Total Loss: 4.79 | Total Steps:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8982/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: -0.10 | Critic loss: 3.25 | Entropy loss: -0.0235  | Total Loss: 3.13 | Total Steps: 116
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8983/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.05 | Critic loss: 5.59 | Entropy loss: -0.0169  | Total Loss: 5.52 | Total Steps: 114
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 8984/200000  | Episode Reward: 1.0  | Average Reward 4.80  | Actor loss: -0.35 | Critic loss: 4.58 | Entropy loss: -0.0132  | Total Loss: 4.21 | Total Steps: 178
--- target colour: blue, target object: sphere ---
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9006/200000  | Episode Reward: 6.0  | Average Reward 4.67  | Actor loss: -0.58 | Critic loss: 3.29 | Entropy loss: -0.0343  | Total Loss: 2.67 | Total Steps: 166
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9007/200000  | Episode Reward: 7.0  | Average Reward 4.71  | Actor loss: -0.17 | Critic loss: 3.96 | Entropy loss: -0.0130  | Total Loss: 3.78 | Total Steps: 140
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9008/200000  | Episode Reward: 7.0  | Average Reward 4.73  | Actor loss: -0.02 | Critic loss: 3.38 | Entropy loss: -0.0131  | Total Loss: 3.34 | Total Steps: 120
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9009/200000  | Episode 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9032/200000  | Episode Reward: 10.0  | Average Reward 4.59  | Actor loss: 0.41 | Critic loss: 3.54 | Entropy loss: -0.0015  | Total Loss: 3.94 | Total Steps: 17
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9033/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.29 | Critic loss: 4.00 | Entropy loss: -0.0173  | Total Loss: 3.69 | Total Steps: 145
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9034/200000  | Episode Reward: -2.0  | Average Reward 4.61  | Actor loss: -0.53 | Critic loss: 6.86 | Entropy loss: -0.0149  | Total Loss: 6.32 | Total Steps: 167
--- target colour: blue, target object: capsule 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9057/200000  | Episode Reward: -3.0  | Average Reward 4.38  | Actor loss: -0.82 | Critic loss: 5.86 | Entropy loss: -0.0233  | Total Loss: 5.01 | Total Steps: 151
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9058/200000  | Episode Reward: 10.0  | Average Reward 4.43  | Actor loss: 0.44 | Critic loss: 4.40 | Entropy loss: -0.0015  | Total Loss: 4.84 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9059/200000  | Episode Reward: 1.0  | Average Reward 4.41  | Actor loss: -0.19 | Critic loss: 5.51 | Entropy loss: -0.0078  | Total Loss: 5.31 | Total Steps: 141
--- target colo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9082/200000  | Episode Reward: 1.0  | Average Reward 4.23  | Actor loss: -0.40 | Critic loss: 3.98 | Entropy loss: -0.0139  | Total Loss: 3.57 | Total Steps: 140
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9083/200000  | Episode Reward: 10.0  | Average Reward 4.29  | Actor loss: 0.52 | Critic loss: 5.78 | Entropy loss: -0.0014  | Total Loss: 6.30 | Total Steps: 17
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9084/200000  | Episode Reward: 7.0  | Average Reward 4.29  | Actor loss: -0.13 | Critic loss: 8.36 | Entropy loss: -0.0111  | Total Loss: 8.22 | Total Steps: 80
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9108/200000  | Episode Reward: 10.0  | Average Reward 4.28  | Actor loss: 0.39 | Critic loss: 5.37 | Entropy loss: -0.0012  | Total Loss: 5.76 | Total Steps: 16
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9109/200000  | Episode Reward: 6.0  | Average Reward 4.27  | Actor loss: -0.14 | Critic loss: 3.02 | Entropy loss: -0.0015  | Total Loss: 2.88 | Total Steps: 289
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9110/200000  | Episode Reward: 4.0  | Average Reward 4.29  | Actor loss: -0.39 | Critic loss: 3.70 | Entropy loss: -0.0203  | Total Loss: 3.29 | Total Steps: 173
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9134/200000  | Episode Reward: 4.0  | Average Reward 4.42  | Actor loss: -0.33 | Critic loss: 3.37 | Entropy loss: -0.0154  | Total Loss: 3.02 | Total Steps: 172
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9135/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.65 | Critic loss: 3.97 | Entropy loss: -0.0033  | Total Loss: 4.62 | Total Steps: 18
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9136/200000  | Episode Reward: 1.0  | Average Reward 4.40  | Actor loss: -0.71 | Critic loss: 7.82 | Entropy loss: -0.0123  | Total Loss: 7.10 | Total Steps: 106
--- target colour: green, target object: sphere ---
Decision Step reward: -3


Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9159/200000  | Episode Reward: 7.0  | Average Reward 4.55  | Actor loss: -0.48 | Critic loss: 3.18 | Entropy loss: -0.0141  | Total Loss: 2.69 | Total Steps: 131
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9160/200000  | Episode Reward: 4.0  | Average Reward 4.55  | Actor loss: -0.25 | Critic loss: 5.49 | Entropy loss: -0.0097  | Total Loss: 5.24 | Total Steps: 141
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9161/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 2.92 | Critic loss: 11.02 | Entropy loss: -0.0121  | Total Loss: 13.93 | Total Steps: 41
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9162/200000  | Epis

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9185/200000  | Episode Reward: 10.0  | Average Reward 4.68  | Actor loss: 0.16 | Critic loss: 2.99 | Entropy loss: -0.0018  | Total Loss: 3.15 | Total Steps: 19
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9186/200000  | Episode Reward: 10.0  | Average Reward 4.71  | Actor loss: 0.17 | Critic loss: 3.82 | Entropy loss: -0.0006  | Total Loss: 4.00 | Total Steps: 17
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9187/200000  | Episode Reward: -5.0  | Average Reward 4.63  | Actor loss: -0.38 | Critic loss: 6.63 | Entropy loss: -0.0083  | Total Loss: 6.24 | Total Steps: 140
--- target colour: yellow, target object: cylinder ---
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9211/200000  | Episode Reward: 7.0  | Average Reward 4.88  | Actor loss: -0.50 | Critic loss: 4.89 | Entropy loss: -0.0183  | Total Loss: 4.37 | Total Steps: 137
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9212/200000  | Episode Reward: -2.0  | Average Reward 4.87  | Actor loss: -0.46 | Critic loss: 6.90 | Entropy loss: -0.0160  | Total Loss: 6.42 | Total Steps: 173
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9213/200000  | Episode Reward: 10.0  | Average Reward 4.87  | Actor loss: 0.26 | Critic loss: 2.66 | Entropy loss: -0.0015  | Total Loss: 2.91 | Total Steps: 18
--- target colour: green, target object: capsule ---
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9236/200000  | Episode Reward: 10.0  | Average Reward 4.84  | Actor loss: 0.00 | Critic loss: 2.81 | Entropy loss: -0.0208  | Total Loss: 2.79 | Total Steps: 124
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9237/200000  | Episode Reward: 1.0  | Average Reward 4.91  | Actor loss: -0.59 | Critic loss: 5.22 | Entropy loss: -0.0246  | Total Loss: 4.61 | Total Steps: 162
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9238/200000  | Episode Reward: -2.0  | Average Reward 4.86  | Actor loss: -0.72 | Critic loss: 7.00 | Entropy loss: -0.0199  | Total Loss: 6.26 | Total Steps: 191
--- target colour: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9262/200000  | Episode Reward: 1.0  | Average Reward 5.04  | Actor loss: -0.46 | Critic loss: 6.73 | Entropy loss: -0.0147  | Total Loss: 6.25 | Total Steps: 136
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9263/200000  | Episode Reward: 10.0  | Average Reward 5.07  | Actor loss: 0.10 | Critic loss: 5.84 | Entropy loss: -0.0003  | Total Loss: 5.94 | Total Steps: 17
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9264/200000  | Episode Reward: 4.0  | Average Reward 5.04  | Actor loss: -0.29 | Critic loss: 5.20 | Entropy loss: -0.0163  | Total Loss: 4.90 | Total Steps: 115
--- target colour: black, target object: cylinder ---
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.505. Model has been saved-----
Training  | Episode: 9289/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: -0.06 | Critic loss: 2.60 | Entropy loss: -0.0122  | Total Loss: 2.53 | Total Steps: 136
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9290/200000  | Episode Reward: 2.0  | Average Reward 5.46  | Actor loss: -0.48 | Critic loss: 4.84 | Entropy loss: -0.0194  | Total Loss: 4.34 | Total Steps: 186
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9291/200000  | Episode Reward: 1.0  | Average Reward 5.46  | Actor loss: -0.39 | Critic loss: 6.41 | Entropy loss: -0.0103  | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9314/200000  | Episode Reward: 10.0  | Average Reward 5.30  | Actor loss: 0.42 | Critic loss: 4.00 | Entropy loss: -0.0012  | Total Loss: 4.42 | Total Steps: 16
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9315/200000  | Episode Reward: -14.0  | Average Reward 5.18  | Actor loss: 1.15 | Critic loss: 7.28 | Entropy loss: -0.0048  | Total Loss: 8.42 | Total Steps: 284
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9316/200000  | Episode Reward: 1.0  | Average Reward 5.14  | Actor lo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9339/200000  | Episode Reward: 10.0  | Average Reward 5.25  | Actor loss: 0.41 | Critic loss: 4.50 | Entropy loss: -0.0011  | Total Loss: 4.91 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9340/200000  | Episode Reward: -2.0  | Average Reward 5.21  | Actor loss: -0.43 | Critic loss: 6.79 | Entropy loss: -0.0138  | Total Loss: 6.35 | Total Steps: 148
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9341/200000  | Episode Reward: 4.0  | Average Reward 5.20  | Actor loss: -0.14 | Critic loss: 4.40 | Entropy loss: -0.0078  | Total Loss: 4.25 | Total Steps: 111
--- target colour: black, target object: cyli

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9364/200000  | Episode Reward: 4.0  | Average Reward 5.03  | Actor loss: -0.43 | Critic loss: 4.61 | Entropy loss: -0.0133  | Total Loss: 4.17 | Total Steps: 133
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9365/200000  | Episode Reward: -2.0  | Average Reward 4.98  | Actor loss: -0.54 | Critic loss: 5.37 | Entropy loss: -0.0166  | Total Loss: 4.81 | Total Steps: 186
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9366/200000  | Episode Reward: 4.0  | Average Reward 4.98  | Actor loss: -0.32 | Critic loss: 5.24 | Entropy loss: -0.0086  | Total Loss: 4.91 | Total Steps: 128

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9389/200000  | Episode Reward: 1.0  | Average Reward 5.00  | Actor loss: -0.40 | Critic loss: 4.39 | Entropy loss: -0.0158  | Total Loss: 3.97 | Total Steps: 164
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9390/200000  | Episode Reward: 7.0  | Average Reward 5.08  | Actor loss: -0.06 | Critic loss: 3.52 | Entropy loss: -0.0130  | Total Loss: 3.45 | Total Steps: 106
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9391/200000  | Episode Reward: 10.0  | Average Reward 5.09  | Actor loss: 0.51 | Critic loss: 5.04 | Entropy loss: -0.0013  | Total Loss: 5.55 | Total Steps: 16
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9415/200000  | Episode Reward: 10.0  | Average Reward 5.01  | Actor loss: 0.67 | Critic loss: 3.38 | Entropy loss: -0.0028  | Total Loss: 4.05 | Total Steps: 18
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9416/200000  | Episode Reward: 10.0  | Average Reward 5.05  | Actor loss: 0.11 | Critic loss: 3.34 | Entropy loss: -0.0193  | Total Loss: 3.43 | Total Steps: 109
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9417/200000  | Episode Reward: 10.0  | Average Reward 5.10  | Actor loss: -0.09 | Critic loss: 2.46 | Entropy loss: -0.0143  | Total Loss: 2.36 | Total Steps: 139
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9418/200000  | Episode 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9441/200000  | Episode Reward: 1.0  | Average Reward 5.05  | Actor loss: -0.33 | Critic loss: 5.70 | Entropy loss: -0.0096  | Total Loss: 5.35 | Total Steps: 150
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9442/200000  | Episode Reward: -14.0  | Average Reward 4.97  | Actor loss: -0.88 | Critic loss: 10.08 | Entropy loss: -0.0279  | Total Loss: 9.17 | Total Steps: 243
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9443/200000  | Episode Reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9465/200000  | Episode Reward: 4.0  | Average Reward 4.86  | Actor loss: -0.07 | Critic loss: 4.66 | Entropy loss: -0.0084  | Total Loss: 4.58 | Total Steps: 148
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9466/200000  | Episode Reward: 10.0  | Average Reward 4.86  | Actor loss: 0.24 | Critic loss: 4.19 | Entropy loss: -0.0007  | Total Loss: 4.42 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9467/200000  | Episode Reward: -5.0  | Average Reward 4.78  | Actor loss: -0.49 | Critic loss: 6.60 | Entropy loss: -0.0147  | Total Loss: 6.09 | Total Steps: 247
--- target colour: yellow, target object: sphere ---
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9490/200000  | Episode Reward: 10.0  | Average Reward 4.45  | Actor loss: 1.17 | Critic loss: 3.88 | Entropy loss: -0.0026  | Total Loss: 5.06 | Total Steps: 18
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9491/200000  | Episode Reward: -2.0  | Average Reward 4.43  | Actor loss: -0.64 | Critic loss: 6.55 | Entropy loss: -0.0228  | Total Loss: 5.88 | Total Steps: 164
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9492/200000  | Episode Reward: -5.0  | Average Reward 4.36  | Actor loss: 0.98 | Critic loss: 4.79 | Entropy loss: -0.0059  | 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9515/200000  | Episode Reward: 10.0  | Average Reward 4.56  | Actor loss: 1.45 | Critic loss: 4.04 | Entropy loss: -0.0031  | Total Loss: 5.49 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9516/200000  | Episode Reward: 10.0  | Average Reward 4.61  | Actor loss: 0.57 | Critic loss: 3.56 | Entropy loss: -0.0017  | Total Loss: 4.13 | Total Steps: 21
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9517/200000  | Episode Reward: 10.0  | Average Reward 4.65  | Actor loss: 0.09 | Critic loss: 4.80 | Entropy loss: -0.0023  | Total Loss: 4.89 | Total Steps: 23
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9518/200000  | Episode Reward: 10.0  | Average Reward 4.67  | Actor loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9541/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.24 | Critic loss: 4.41 | Entropy loss: -0.0135  | Total Loss: 4.16 | Total Steps: 153
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9542/200000  | Episode Reward: 1.0  | Average Reward 4.71  | Actor loss: -0.47 | Critic loss: 5.23 | Entropy loss: -0.0136  | Total Loss: 4.75 | Total Steps: 143
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9543/200000  | Episode Reward: -2.0  | Average Reward 4.68  | Actor loss: -0.59 | Critic loss: 6.72 | Entropy loss: -0.0130  | Total Loss: 6.12 | Total Steps: 155
--- target colour:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9566/200000  | Episode Reward: 7.0  | Average Reward 4.73  | Actor loss: -0.33 | Critic loss: 6.82 | Entropy loss: -0.0085  | Total Loss: 6.49 | Total Steps: 101
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9567/200000  | Episode Reward: 7.0  | Average Reward 4.71  | Actor loss: -0.09 | Critic loss: 7.35 | Entropy loss: -0.0132  | Total Loss: 7.25 | Total Steps: 100
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9568/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: 0.00 | Critic loss: 2.63 | Entropy loss: -0.0111  | Total Loss: 2.62 | Total Steps: 118
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9569/200000  | Episo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9592/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 0.19 | Critic loss: 2.66 | Entropy loss: -0.0015  | Total Loss: 2.85 | Total Steps: 18
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9593/200000  | Episode Reward: -2.0  | Average Reward 4.84  | Actor loss: -0.82 | Critic loss: 7.92 | Entropy loss: -0.0151  | Total Loss: 7.09 | Total Steps: 144
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9594/200000  | Episode Reward: 10.0  | Average Reward 4.88  | Actor loss: 0.22 | Critic loss: 4.27 | Entropy loss: -0.0007  | Total Loss: 4.49 | Total Steps: 17
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9616/200000  | Episode Reward: 10.0  | Average Reward 4.45  | Actor loss: 0.35 | Critic loss: 3.66 | Entropy loss: -0.0013  | Total Loss: 4.01 | Total Steps: 16
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9617/200000  | Episode Reward: 4.0  | Average Reward 4.42  | Actor loss: -0.39 | Critic loss: 3.87 | Entropy loss: -0.0143  | Total Loss: 3.47 | Total Steps: 134
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9618/200000  | Episode Reward: -5.0  | Average Reward 4.37  | Actor loss: 2.25 | Critic loss: 17.36 | Entropy loss: -0.0020  | Total Loss: 19.61 | Total Steps: 261
--- targe

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9641/200000  | Episode Reward: 1.0  | Average Reward 4.42  | Actor loss: -0.28 | Critic loss: 4.61 | Entropy loss: -0.0181  | Total Loss: 4.31 | Total Steps: 120
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9642/200000  | Episode Reward: -2.0  | Average Reward 4.47  | Actor loss: -0.63 | Critic loss: 4.98 | Entropy loss: -0.0255  | Total Loss: 4.32 | Total Steps: 226
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9643/200000  | Episode Reward: 10.0  | Average Reward 4.50  | Actor loss: 0.15 | Critic loss: 4.27 | Entropy loss: -0.0134  | Total Loss: 4.40 | Total Steps: 103
--- target colour:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9666/200000  | Episode Reward: -5.0  | Average Reward 4.49  | Actor loss: -0.46 | Critic loss: 6.93 | Entropy loss: -0.0245  | Total Loss: 6.45 | Total Steps: 159
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9667/200000  | Episode Reward: -14.0  | Average Reward 4.45  | Actor loss: 0.20 | Critic loss: 7.08 | Entropy loss: -0.0131  | Total Loss: 7.27 | Total Steps: 318
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termi

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9691/200000  | Episode Reward: 1.0  | Average Reward 4.67  | Actor loss: -0.57 | Critic loss: 4.37 | Entropy loss: -0.0231  | Total Loss: 3.77 | Total Steps: 170
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9692/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: 0.75 | Critic loss: 5.21 | Entropy loss: -0.0015  | Total Loss: 5.96 | Total Steps: 18
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9693/200000  | Episode Reward: -2.0  | Average Reward 4.71  | Actor loss: -0.33 | Critic loss: 6.36 | Entropy loss: -0.0130  | Total Loss: 6.01 | Total Steps: 160
--- target colour: black, target object: cap

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9716/200000  | Episode Reward: 4.0  | Average Reward 4.59  | Actor loss: -0.22 | Critic loss: 5.94 | Entropy loss: -0.0148  | Total Loss: 5.71 | Total Steps: 110
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9717/200000  | Episode Reward: 4.0  | Average Reward 4.57  | Actor loss: -0.35 | Critic loss: 6.45 | Entropy loss: -0.0102  | Total Loss: 6.08 | Total Steps: 117
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9718/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: -0.37 | Critic loss: 5.09 | Entropy loss: -0.0277  | Total Loss: 4.69 | Total Steps: 185
--- target colour: yellow, target object: cylinde

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9741/200000  | Episode Reward: 10.0  | Average Reward 4.45  | Actor loss: -0.06 | Critic loss: 3.88 | Entropy loss: -0.0078  | Total Loss: 3.81 | Total Steps: 100
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9742/200000  | Episode Reward: 4.0  | Average Reward 4.46  | Actor loss: -0.21 | Critic loss: 5.77 | Entropy loss: -0.0094  | Total Loss: 5.55 | Total Steps: 101
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9743/200000  | Episode Reward: 10.0  | Average Reward 4.52  | Actor loss: 0.76 | Critic loss: 5.03 | Entropy loss: -0.0030  | Total Loss: 5.79 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9766/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.30 | Critic loss: 5.56 | Entropy loss: -0.0084  | Total Loss: 5.24 | Total Steps: 156
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9767/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: 0.41 | Critic loss: 4.88 | Entropy loss: -0.0019  | Total Loss: 5.29 | Total Steps: 18
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9768/200000  | Episode Reward: 1.0  | Average Reward 4.47  | Actor loss: -0.42 | Critic loss: 4.50 | Entropy loss: -0.0233  | Total Loss: 4.05 | Total Steps: 153
--- target colour: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9791/200000  | Episode Reward: 1.0  | Average Reward 4.16  | Actor loss: -0.39 | Critic loss: 5.14 | Entropy loss: -0.0159  | Total Loss: 4.73 | Total Steps: 174
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9792/200000  | Episode Reward: 10.0  | Average Reward 4.16  | Actor loss: -0.17 | Critic loss: 3.96 | Entropy loss: -0.0126  | Total Loss: 3.77 | Total Steps: 101
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9793/200000  | Episode Reward: 4.0  | Average Reward 4.19  | Actor loss: -0.08 | Critic loss: 4.70 | Entropy loss: -0.0196  | Total Loss: 4.59 | Total Steps: 114
--- target colour: green, target object: cylinder ---
Agent in t

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9816/200000  | Episode Reward: 4.0  | Average Reward 4.39  | Actor loss: -0.59 | Critic loss: 5.00 | Entropy loss: -0.0154  | Total Loss: 4.39 | Total Steps: 176
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9817/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 1.07 | Critic loss: 3.07 | Entropy loss: -0.0021  | Total Loss: 4.14 | Total Steps: 18
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9818/200000  | Episode Reward: 4.0  | Average Reward 4.46  | Actor loss: 0.19 | Critic loss: 6.17 | Entropy loss: -0.0160  | Total Loss: 6.34 | Total Steps: 108
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9841/200000  | Episode Reward: -12.0  | Average Reward 4.34  | Actor loss: -0.22 | Critic loss: 4.60 | Entropy loss: -0.0123  | Total Loss: 4.37 | Total Steps: 371
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9842/200000  | Episode Reward: -2.0  | Average Reward 4.34  | Actor loss: -0.85 | Critic loss: 7.24 | Entropy loss: -0.0192  | Total Loss: 6.38 | Total Steps: 149
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9843/200000  | Episode Reward: 4.0  | Averag

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9865/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.41 | Critic loss: 4.64 | Entropy loss: -0.0011  | Total Loss: 5.05 | Total Steps: 17
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9866/200000  | Episode Reward: 7.0  | Average Reward 4.25  | Actor loss: -0.13 | Critic loss: 3.16 | Entropy loss: -0.0213  | Total Loss: 3.00 | Total Steps: 164
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9867/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: -0.03 | Critic loss: 3.93 | Entropy loss: -0.0062  | Total Loss: 3.89 | Total Steps: 94
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9868/200000  | Episode Reward: 10.0  | Average Rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9890/200000  | Episode Reward: 4.0  | Average Reward 4.12  | Actor loss: 0.11 | Critic loss: 4.24 | Entropy loss: -0.0159  | Total Loss: 4.33 | Total Steps: 115
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9891/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.68 | Critic loss: 3.05 | Entropy loss: -0.0024  | Total Loss: 3.72 | Total Steps: 18
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9892/200000  | Episode Reward: 7.0  | Average Reward 4.15  | Actor loss: 0.10 | Critic loss: 7.55 | Entropy loss: -0.0130  | Total Loss: 7.64 | Total Steps: 100
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9915/200000  | Episode Reward: -2.0  | Average Reward 4.17  | Actor loss: -0.89 | Critic loss: 3.93 | Entropy loss: -0.0312  | Total Loss: 3.01 | Total Steps: 178
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9916/200000  | Episode Reward: 10.0  | Average Reward 4.20  | Actor loss: 0.82 | Critic loss: 3.43 | Entropy loss: -0.0034  | Total Loss: 4.24 | Total Steps: 18
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9917/200000  | Episode Reward: 4.0  | Average Reward 4.20  | Actor loss: -0.44 | Critic loss: 4.23 | Entropy loss: -0.0228  | Total Loss: 3.77 | Total Steps: 172
--- target colour: blue, target object: cube -

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9940/200000  | Episode Reward: 1.0  | Average Reward 4.05  | Actor loss: -0.34 | Critic loss: 4.98 | Entropy loss: -0.0154  | Total Loss: 4.62 | Total Steps: 165
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9941/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: 0.33 | Critic loss: 3.28 | Entropy loss: -0.0014  | Total Loss: 3.61 | Total Steps: 17
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9942/200000  | Episode Reward: 7.0  | Average Reward 4.07  | Actor loss: -0.45 | Critic loss: 5.27 | Entropy loss: -0.0121  | Total Loss: 4.81 | Total Steps: 126
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9965/200000  | Episode Reward: 1.0  | Average Reward 4.12  | Actor loss: -0.50 | Critic loss: 4.50 | Entropy loss: -0.0208  | Total Loss: 3.98 | Total Steps: 164
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9966/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.33 | Critic loss: 3.18 | Entropy loss: -0.0015  | Total Loss: 3.51 | Total Steps: 17
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9967/200000  | Episode Reward: 4.0  | Average Reward 4.14  | Actor loss: -0.19 | Critic loss: 3.09 | Entropy loss: -0.0193  | Total Loss: 2.89 | Total Steps: 159
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9989/200000  | Episode Reward: -2.0  | Average Reward 4.04  | Actor loss: -0.49 | Critic loss: 6.28 | Entropy loss: -0.0161  | Total Loss: 5.77 | Total Steps: 159
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 9990/200000  | Episode Reward: 10.0  | Average Reward 4.08  | Actor loss: 0.14 | Critic loss: 4.27 | Entropy loss: -0.0095  | Total Loss: 4.40 | Total Steps: 102
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 9991/200000  | Episode Reward: -37.0  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10014/200000  | Episode Reward: 10.0  | Average Reward 3.98  | Actor loss: 0.29 | Critic loss: 4.12 | Entropy loss: -0.0015  | Total Loss: 4.41 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10015/200000  | Episode Reward: 1.0  | Average Reward 3.96  | Actor loss: -0.31 | Critic loss: 4.21 | Entropy loss: -0.0146  | Total Loss: 3.88 | Total Steps: 148
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10016/200000  | Episode Reward: -11.0  | Average Reward 3.89  | Actor loss: -0.79 | Critic loss: 6.40 | Ent

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10039/200000  | Episode Reward: 10.0  | Average Reward 3.92  | Actor loss: 0.08 | Critic loss: 5.45 | Entropy loss: -0.0002  | Total Loss: 5.52 | Total Steps: 17
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10040/200000  | Episode Reward: 10.0  | Average Reward 3.94  | Actor loss: 0.40 | Critic loss: 2.74 | Entropy loss: -0.0012  | Total Loss: 3.14 | Total Steps: 16
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10041/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: -0.12 | Critic loss: 2.93 | Entropy loss: -0.0182  | Total Loss: 2.79 | Total Steps: 145
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10042/200000  | Episode Reward: 10.0  | Average Reward 4.12  | Ac

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10064/200000  | Episode Reward: 10.0  | Average Reward 4.10  | Actor loss: 0.76 | Critic loss: 9.55 | Entropy loss: -0.0071  | Total Loss: 10.30 | Total Steps: 65
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10065/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: -0.11 | Critic loss: 2.85 | Entropy loss: -0.0159  | Total Loss: 2.72 | Total Steps: 180
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10066/200000  | Episode Reward: 4.0  | Average Reward 4.07  | Actor loss: 0.08 | Critic loss: 4.89 | Entropy loss: -0.0151  | Total Loss: 4.95 | Total Steps: 124
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10090/200000  | Episode Reward: 10.0  | Average Reward 4.32  | Actor loss: 0.62 | Critic loss: 2.64 | Entropy loss: -0.0024  | Total Loss: 3.26 | Total Steps: 17
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10091/200000  | Episode Reward: 4.0  | Average Reward 4.29  | Actor loss: -0.50 | Critic loss: 4.78 | Entropy loss: -0.0124  | Total Loss: 4.27 | Total Steps: 134
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10092/200000  | Episode Reward: 10.0  | Average Reward 4.30  | Actor loss: 0.68 | Critic loss: 4.29 | Entropy loss: -0.0012  | Total Loss: 4.97 | Total Steps: 18
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10117/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: -0.60 | Critic loss: 4.40 | Entropy loss: -0.0074  | Total Loss: 3.79 | Total Steps: 90
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10118/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: -0.27 | Critic loss: 5.23 | Entropy loss: -0.0099  | Total Loss: 4.95 | Total Steps: 106
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10119/200000  | Episode Reward: 7.0  | Average Reward 4.68  | Actor loss: -0.51 | Critic loss: 4.29 | Entropy loss: -0.0122  | Total Loss: 3.78 | Total Steps: 99
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10120/200000  | Episode Reward: 9.0  | Averag

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10142/200000  | Episode Reward: -3.0  | Average Reward 4.60  | Actor loss: -0.96 | Critic loss: 6.32 | Entropy loss: -0.0321  | Total Loss: 5.33 | Total Steps: 174
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10143/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: -0.10 | Critic loss: 2.87 | Entropy loss: -0.0112  | Total Loss: 2.75 | Total Steps: 100
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10144/200000  | Episode Reward: 10.0  | Average Reward 4.63  | Actor loss: -0.65 | Critic loss: 6.18 | Entropy loss: -0.0099  | Total Loss: 5.52 | Total Steps: 90
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10168/200000  | Episode Reward: 1.0  | Average Reward 4.70  | Actor loss: -0.40 | Critic loss: 4.69 | Entropy loss: -0.0172  | Total Loss: 4.27 | Total Steps: 159
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10169/200000  | Episode Reward: 1.0  | Average Reward 4.70  | Actor loss: -0.23 | Critic loss: 6.75 | Entropy loss: -0.0153  | Total Loss: 6.50 | Total Steps: 158
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10170/200000  | Episode Reward: 6.0  | Average Reward 4.71  | Actor loss: -0.39 | Critic loss: 3.36 | Entropy loss: -0.0274  | Total Loss: 2.93 | Total Steps: 198
--- target co

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10193/200000  | Episode Reward: 10.0  | Average Reward 5.33  | Actor loss: 0.31 | Critic loss: 4.38 | Entropy loss: -0.0012  | Total Loss: 4.68 | Total Steps: 17
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10194/200000  | Episode Reward: 4.0  | Average Reward 5.33  | Actor loss: -0.11 | Critic loss: 5.58 | Entropy loss: -0.0101  | Total Loss: 5.46 | Total Steps: 98
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10195/200000  | Episode Reward: 10.0  | Average Reward 5.35  | Actor loss: -0.04 | Critic loss: 3.16 | Entropy loss: -0.0091  | Total Loss: 3.11 | Total Steps: 101
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10219/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: -0.14 | Critic loss: 3.86 | Entropy loss: -0.0094  | Total Loss: 3.71 | Total Steps: 101
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10220/200000  | Episode Reward: 1.0  | Average Reward 5.46  | Actor loss: -0.37 | Critic loss: 6.91 | Entropy loss: -0.0096  | Total Loss: 6.53 | Total Steps: 113
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10221/200000  | Episode Reward: 4.0  | Average Reward 5.44  | Actor loss: -0.28 | Critic loss: 3.62 | Entropy loss: -0.0192  | Total Loss: 3.32 | Total Steps: 159
--- target colour: black, target object: cube ---
Agent in terminal 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10244/200000  | Episode Reward: 7.0  | Average Reward 5.49  | Actor loss: -0.15 | Critic loss: 4.43 | Entropy loss: -0.0099  | Total Loss: 4.26 | Total Steps: 112
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10245/200000  | Episode Reward: -11.0  | Average Reward 5.38  | Actor loss: 0.66 | Critic loss: 5.25 | Entropy loss: -0.0033  | Total Loss: 5.91 | Total Steps: 280
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10246/200000  | Episode Reward: 10.0  | Average Reward 5.42  | Actor loss: 0.52 | Critic loss: 3.92 | Entropy loss: -0.0020  | Total Loss: 4.44 | 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10269/200000  | Episode Reward: 1.0  | Average Reward 5.58  | Actor loss: -0.66 | Critic loss: 5.66 | Entropy loss: -0.0184  | Total Loss: 4.98 | Total Steps: 165
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10270/200000  | Episode Reward: 4.0  | Average Reward 5.57  | Actor loss: -0.10 | Critic loss: 4.75 | Entropy loss: -0.0126  | Total Loss: 4.64 | Total Steps: 158
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10271/200000  | Episode Reward: 4.0  | Average Reward 5.54  | Actor loss: -0.27 | Critic loss: 5.15 | Entropy loss: -0.0128  | Total Loss: 4.87 | Total Steps: 107
--- target co

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10295/200000  | Episode Reward: 4.0  | Average Reward 5.61  | Actor loss: 0.04 | Critic loss: 15.66 | Entropy loss: -0.0002  | Total Loss: 15.70 | Total Steps: 251
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10296/200000  | Episode Reward: 10.0  | Average Reward 5.64  | Actor loss: 0.13 | Critic loss: 2.41 | Entropy loss: -0.0011  | Total Loss: 2.54 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10297/200000  | Episode Reward: -8.0  | Average Reward 5.58  | Actor loss: -1.00 | Critic loss: 8.62 | Entropy loss: -0.0275  | Total Loss: 7.60 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10321/200000  | Episode Reward: -5.0  | Average Reward 5.30  | Actor loss: -0.57 | Critic loss: 6.98 | Entropy loss: -0.0147  | Total Loss: 6.40 | Total Steps: 175
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10322/200000  | Episode Reward: 10.0  | Average Reward 5.33  | Actor loss: 0.73 | Critic loss: 5.31 | Entropy loss: -0.0044  | Total Loss: 6.03 | Total Steps: 23
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10323/200000  | Episode Reward: 10.0  | Average Reward 5.38  | Actor loss: 0.14 | Critic loss: 2.18 | Entropy loss: -0.0010  | Total Loss: 2.32 | Total Steps: 21
--- target colour: blue, target object: sphere ---
Decision Step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10346/200000  | Episode Reward: 4.0  | Average Reward 5.43  | Actor loss: -0.39 | Critic loss: 3.97 | Entropy loss: -0.0113  | Total Loss: 3.57 | Total Steps: 171
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10347/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: 0.27 | Critic loss: 3.95 | Entropy loss: -0.0012  | Total Loss: 4.22 | Total Steps: 16
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10348/200000  | Episode Reward: -2.0  | Average Reward 5.41  | Actor loss: -0.21 | Critic loss: 6.77 | Entropy loss: -0.0096  | Total Loss: 6.55 | Total Steps: 165
--- target colour: green, target object: capsul

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10371/200000  | Episode Reward: 8.0  | Average Reward 5.37  | Actor loss: -0.40 | Critic loss: 3.40 | Entropy loss: -0.0208  | Total Loss: 2.97 | Total Steps: 177
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10372/200000  | Episode Reward: 7.0  | Average Reward 5.40  | Actor loss: -0.32 | Critic loss: 3.38 | Entropy loss: -0.0215  | Total Loss: 3.03 | Total Steps: 165
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10373/200000  | Episode Reward: 4.0  | Average Reward 5.37  | Actor loss: -0.28 | Critic loss: 5.80 | Entropy loss: -0.0069  | Total Loss: 5.51 | Total Steps: 129
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10397/200000  | Episode Reward: 10.0  | Average Reward 5.42  | Actor loss: 1.04 | Critic loss: 2.95 | Entropy loss: -0.0021  | Total Loss: 3.99 | Total Steps: 19
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10398/200000  | Episode Reward: 4.0  | Average Reward 5.42  | Actor loss: -0.60 | Critic loss: 5.59 | Entropy loss: -0.0147  | Total Loss: 4.98 | Total Steps: 105
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10399/200000  | Episode Reward: 4.0  | Average Reward 5.45  | Actor loss: -0.41 | Critic loss: 5.81 | Entropy loss: -0.0128  | Total Loss: 5.38 | Total Steps: 139
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10423/200000  | Episode Reward: 10.0  | Average Reward 5.36  | Actor loss: 0.17 | Critic loss: 4.73 | Entropy loss: -0.0117  | Total Loss: 4.89 | Total Steps: 106
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10424/200000  | Episode Reward: 10.0  | Average Reward 5.38  | Actor loss: 0.36 | Critic loss: 3.40 | Entropy loss: -0.0011  | Total Loss: 3.75 | Total Steps: 16
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10425/200000  | Episode Reward: 10.0  | Average Reward 5.38  | Actor loss: 0.70 | Critic loss: 4.04 | Entropy loss: -0.0012  | Total Loss: 4.74 | Total Steps: 18
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10426/200000  | Episode Reward: 10.0  | Average Reward 5.41  | Actor loss

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10450/200000  | Episode Reward: -2.0  | Average Reward 5.49  | Actor loss: -0.63 | Critic loss: 7.89 | Entropy loss: -0.0170  | Total Loss: 7.24 | Total Steps: 181
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10451/200000  | Episode Reward: 2.0  | Average Reward 5.60  | Actor loss: -0.87 | Critic loss: 7.01 | Entropy loss: -0.0223  | Total Loss: 6.12 | Total Steps: 182
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.69. Model has been saved-----
Training  | Episode: 10452/200000  | Episode Reward: 10.0  | Average Reward 5.69  | Actor loss: 0.08 | Critic loss: 2.75 | Entr

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10474/200000  | Episode Reward: 7.0  | Average Reward 5.49  | Actor loss: -0.11 | Critic loss: 4.35 | Entropy loss: -0.0102  | Total Loss: 4.23 | Total Steps: 125
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10475/200000  | Episode Reward: 10.0  | Average Reward 5.57  | Actor loss: 0.61 | Critic loss: 3.95 | Entropy loss: -0.0015  | Total Loss: 4.56 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10476/200000  | Episode Reward: 10.0  | Average Reward 5.57  | Actor loss: -0.02 | Critic loss: 2.67 | Entropy loss: -0.0143  | Total Loss: 2.63 | Total Steps: 180
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10499/200000  | Episode Reward: -2.0  | Average Reward 5.18  | Actor loss: -0.39 | Critic loss: 8.05 | Entropy loss: -0.0154  | Total Loss: 7.64 | Total Steps: 191
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10500/200000  | Episode Reward: 10.0  | Average Reward 5.20  | Actor loss: 0.64 | Critic loss: 4.78 | Entropy loss: -0.0020  | Total Loss: 5.42 | Total Steps: 17
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10501/200000  | Episode Reward: 10.0  | Average Reward 5.21  | Actor loss: 0.23 | Critic loss: 3.25 | Entropy loss: -0.0165  | Total Loss: 3.47 | Total Steps: 110
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10525/200000  | Episode Reward: 10.0  | Average Reward 5.33  | Actor loss: 0.35 | Critic loss: 4.09 | Entropy loss: -0.0011  | Total Loss: 4.44 | Total Steps: 16
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10526/200000  | Episode Reward: 7.0  | Average Reward 5.31  | Actor loss: -0.11 | Critic loss: 3.23 | Entropy loss: -0.0112  | Total Loss: 3.11 | Total Steps: 146
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10527/200000  | Episode Reward: 7.0  | Average Reward 5.29  | Actor loss: -0.00 | Critic loss: 2.86 | Entropy loss: -0.0111  | Total Loss: 2.85 | Total Steps: 114
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  |

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10551/200000  | Episode Reward: 7.0  | Average Reward 5.50  | Actor loss: -0.29 | Critic loss: 4.40 | Entropy loss: -0.0085  | Total Loss: 4.10 | Total Steps: 114
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10552/200000  | Episode Reward: 1.0  | Average Reward 5.51  | Actor loss: -0.40 | Critic loss: 4.30 | Entropy loss: -0.0171  | Total Loss: 3.89 | Total Steps: 174
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10553/200000  | Episode Reward: 4.0  | Average Reward 5.53  | Actor loss: -0.29 | Critic loss: 5.58 | Entropy loss: -0.0111  | Total Loss: 5.28 | Total Steps: 106
--- target colour: red, target object: spher

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10576/200000  | Episode Reward: -2.0  | Average Reward 5.29  | Actor loss: -0.66 | Critic loss: 5.65 | Entropy loss: -0.0218  | Total Loss: 4.98 | Total Steps: 178
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10577/200000  | Episode Reward: -3.0  | Average Reward 5.28  | Actor loss: -0.96 | Critic loss: 5.69 | Entropy loss: -0.0343  | Total Loss: 4.70 | Total Steps: 203
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10578/200000  | Episode Reward: 4.0  | Average Reward 5.25  | Actor loss: -0.30 | Critic loss: 3.58 | Entropy loss: -0.0106 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10601/200000  | Episode Reward: 1.0  | Average Reward 5.05  | Actor loss: -0.40 | Critic loss: 6.57 | Entropy loss: -0.0107  | Total Loss: 6.15 | Total Steps: 139
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10602/200000  | Episode Reward: -2.0  | Average Reward 5.04  | Actor loss: -0.18 | Critic loss: 6.16 | Entropy loss: -0.0102  | Total Loss: 5.96 | Total Steps: 173
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10603/200000  | Episode Reward: -5.0  | Average Rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10626/200000  | Episode Reward: 4.0  | Average Reward 4.79  | Actor loss: -0.22 | Critic loss: 3.41 | Entropy loss: -0.0220  | Total Loss: 3.17 | Total Steps: 147
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10627/200000  | Episode Reward: 1.0  | Average Reward 4.75  | Actor loss: -0.50 | Critic loss: 5.24 | Entropy loss: -0.0137  | Total Loss: 4.73 | Total Steps: 167
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10628/200000  | Episode Reward: -3.0  | Average Reward 4.68  | Actor loss: -0.60 | Critic loss: 4.94 | Entropy loss: -0.0281  | To

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10651/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: 0.03 | Critic loss: 3.58 | Entropy loss: -0.0089  | Total Loss: 3.60 | Total Steps: 104
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10652/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: -0.28 | Critic loss: 4.72 | Entropy loss: -0.0136  | Total Loss: 4.43 | Total Steps: 105
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10653/200000  | Episode Reward: 4.0  | Average Reward 4.50  | Actor loss: -0.13 | Critic loss: 6.09 | Entropy loss: -0.0092  | Total Loss: 5.95 | Total Steps: 110
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10677/200000  | Episode Reward: 10.0  | Average Reward 4.70  | Actor loss: 0.56 | Critic loss: 3.20 | Entropy loss: -0.0014  | Total Loss: 3.76 | Total Steps: 18
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10678/200000  | Episode Reward: -5.0  | Average Reward 4.65  | Actor loss: -0.48 | Critic loss: 5.97 | Entropy loss: -0.0211  | Total Loss: 5.47 | Total Steps: 229
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10679/200000  | Episode Reward: 1.0  | Average Reward 4.61  | Actor loss: -0.56 | Critic loss: 5.03 | Entropy loss: -0.0154  | Total Loss: 4.45 | Total 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10701/200000  | Episode Reward: -5.0  | Average Reward 4.45  | Actor loss: -0.64 | Critic loss: 5.82 | Entropy loss: -0.0201  | Total Loss: 5.16 | Total Steps: 169
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10702/200000  | Episode Reward: 4.0  | Average Reward 4.42  | Actor loss: -0.36 | Critic loss: 3.48 | Entropy loss: -0.0222  | Total Loss: 3.10 | Total Steps: 221
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10703/200000  | Episode Reward: 10.0  | Average Reward 4.43  | Actor loss: 0.06 | Critic loss: 5.40 | Entropy loss: -0.0075  | Total Loss: 5.45 | Total Steps: 100
--- target colo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10726/200000  | Episode Reward: 10.0  | Average Reward 4.30  | Actor loss: 0.36 | Critic loss: 3.58 | Entropy loss: -0.0019  | Total Loss: 3.94 | Total Steps: 22
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10727/200000  | Episode Reward: 4.0  | Average Reward 4.29  | Actor loss: -0.60 | Critic loss: 4.67 | Entropy loss: -0.0183  | Total Loss: 4.06 | Total Steps: 172
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10728/200000  | Episode Reward: -6.0  | Average Reward 4.22  | Actor loss: -0.87 | Critic loss: 7.61 | Entropy loss: -0.0189  | Total Loss: 6.72 | Total Steps: 171
--- target colo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10751/200000  | Episode Reward: -2.0  | Average Reward 4.01  | Actor loss: -0.31 | Critic loss: 4.99 | Entropy loss: -0.0217  | Total Loss: 4.66 | Total Steps: 184
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10752/200000  | Episode Reward: -5.0  | Average Reward 3.98  | Actor loss: -0.58 | Critic loss: 7.12 | Entropy loss: -0.0149  | Total Loss: 6.52 | Total Steps: 172
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10753/200000  | Episode Reward: 7.0  | Average Reward 4.00  | Actor loss: -0.11 | Critic loss: 6.12 | 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10777/200000  | Episode Reward: 10.0  | Average Reward 4.39  | Actor loss: 0.30 | Critic loss: 3.25 | Entropy loss: -0.0012  | Total Loss: 3.55 | Total Steps: 16
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10778/200000  | Episode Reward: -17.0  | Average Reward 4.29  | Actor loss: -0.33 | Critic loss: 6.21 | Entropy loss: -0.0130  | Total Loss: 5.86 | Total Steps: 365
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10779/200000  | Episode Reward: 10.0  | Average Reward 4.32  | Actor loss: -0.02 | Critic loss: 4.24 | Entropy loss: -0.01

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10802/200000  | Episode Reward: 7.0  | Average Reward 4.49  | Actor loss: -0.67 | Critic loss: 3.82 | Entropy loss: -0.0138  | Total Loss: 3.14 | Total Steps: 135
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10803/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.53 | Critic loss: 5.70 | Entropy loss: -0.0199  | Total Loss: 5.15 | Total Steps: 168
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10804/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: 0.05 | Critic loss: 3.64 | Entropy loss: -0.0083  | Total Loss: 3.69 | Total Steps: 97
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10827/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.03 | Critic loss: 3.84 | Entropy loss: -0.0090  | Total Loss: 3.86 | Total Steps: 105
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10828/200000  | Episode Reward: 10.0  | Average Reward 4.62  | Actor loss: 0.05 | Critic loss: 3.72 | Entropy loss: -0.0114  | Total Loss: 3.75 | Total Steps: 94
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10829/200000  | Episode Reward: 1.0  | Average Reward 4.62  | Actor loss: -0.55 | Critic loss: 5.29 | Entropy loss: -0.0128  | Total Loss: 4.73 | Total Steps: 135
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10853/200000  | Episode Reward: 10.0  | Average Reward 4.76  | Actor loss: 0.67 | Critic loss: 4.13 | Entropy loss: -0.0017  | Total Loss: 4.80 | Total Steps: 17
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10854/200000  | Episode Reward: 7.0  | Average Reward 4.86  | Actor loss: -0.32 | Critic loss: 3.73 | Entropy loss: -0.0102  | Total Loss: 3.40 | Total Steps: 132
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10855/200000  | Episode Reward: -2.0  | Average Reward 4.80  | Actor loss: -0.28 | Critic loss: 7.25 | Entropy loss: -0.0221  | Total Loss: 6.95 | Total Steps: 150
--- target colour: red, target object: sphere ---
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10879/200000  | Episode Reward: 4.0  | Average Reward 4.76  | Actor loss: -0.46 | Critic loss: 5.88 | Entropy loss: -0.0132  | Total Loss: 5.41 | Total Steps: 109
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10880/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.26 | Critic loss: 3.96 | Entropy loss: -0.0077  | Total Loss: 3.69 | Total Steps: 135
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10881/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: 1.07 | Critic loss: 2.94 | Entropy loss: -0.0030  | Total Loss: 4.01 | Total Steps: 19
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10904/200000  | Episode Reward: 10.0  | Average Reward 4.97  | Actor loss: 0.97 | Critic loss: 3.90 | Entropy loss: -0.0020  | Total Loss: 4.87 | Total Steps: 19
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10905/200000  | Episode Reward: 10.0  | Average Reward 5.05  | Actor loss: 0.20 | Critic loss: 2.07 | Entropy loss: -0.0015  | Total Loss: 2.27 | Total Steps: 22
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10906/200000  | Episode Reward: 1.0  | Average Reward 5.00  | Actor loss: -0.49 | Critic loss: 6.86 | Entropy loss: -0.0147  | Total Loss: 6.35 | Total Steps: 146
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10929/200000  | Episode Reward: 10.0  | Average Reward 4.98  | Actor loss: 0.01 | Critic loss: 3.11 | Entropy loss: -0.0185  | Total Loss: 3.10 | Total Steps: 146
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10930/200000  | Episode Reward: 10.0  | Average Reward 5.04  | Actor loss: 0.57 | Critic loss: 9.20 | Entropy loss: -0.0103  | Total Loss: 9.76 | Total Steps: 67
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10931/200000  | Episode Reward: 1.0  | Average Reward 5.06  | Actor loss: -0.31 | Critic loss: 5.59 | Entropy loss: -0.0066  | Total Loss: 5.27 | Total Steps: 139
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10955/200000  | Episode Reward: 1.0  | Average Reward 5.13  | Actor loss: -0.55 | Critic loss: 4.79 | Entropy loss: -0.0247  | Total Loss: 4.21 | Total Steps: 167
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10956/200000  | Episode Reward: 4.0  | Average Reward 5.10  | Actor loss: -0.35 | Critic loss: 4.05 | Entropy loss: -0.0222  | Total Loss: 3.68 | Total Steps: 163
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10957/200000  | Episode Reward: -11.0  | Averag

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10979/200000  | Episode Reward: 1.0  | Average Reward 4.76  | Actor loss: -0.31 | Critic loss: 4.77 | Entropy loss: -0.0139  | Total Loss: 4.45 | Total Steps: 138
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10980/200000  | Episode Reward: 4.0  | Average Reward 4.73  | Actor loss: -0.30 | Critic loss: 6.19 | Entropy loss: -0.0068  | Total Loss: 5.89 | Total Steps: 95
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 10981/200000  | Episode Reward: 1.0  | Average Reward 4.78  | Actor loss: -0.54 | Critic loss: 5.83 | Entropy loss: -0.0113  | Total Loss: 5.27 | Total Steps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11004/200000  | Episode Reward: 10.0  | Average Reward 4.59  | Actor loss: 0.46 | Critic loss: 2.91 | Entropy loss: -0.0013  | Total Loss: 3.37 | Total Steps: 22
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11005/200000  | Episode Reward: 10.0  | Average Reward 4.64  | Actor loss: 0.07 | Critic loss: 5.26 | Entropy loss: -0.0002  | Total Loss: 5.33 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11006/200000  | Episode Reward: -5.0  | Average Reward 4.64  | Actor loss: -0.70 | Critic loss: 7.45 | Entropy loss: -0.0137  | Total Loss: 6.73 | Total Steps: 144
--- target colour: green, target object: sphere ---
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11029/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.54 | Critic loss: 4.79 | Entropy loss: -0.0012  | Total Loss: 5.32 | Total Steps: 17
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11030/200000  | Episode Reward: 0.0  | Average Reward 4.50  | Actor loss: -0.42 | Critic loss: 5.00 | Entropy loss: -0.0246  | Total Loss: 4.56 | Total Steps: 151
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11031/200000  | Episode Reward: 7.0  | Average Reward 4.49  | Actor loss: -0.14 | Critic loss: 3.01 | Entropy loss: -0.0101  | Total Loss: 2.86 | Total Steps: 138
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11052/200000  | Episode Reward: 10.0  | Average Reward 4.04  | Actor loss: 0.48 | Critic loss: 3.04 | Entropy loss: -0.0012  | Total Loss: 3.52 | Total Steps: 22
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11053/200000  | Episode Reward: 1.0  | Average Reward 4.00  | Actor loss: -0.23 | Critic loss: 4.56 | Entropy loss: -0.0167  | Total Loss: 4.32 | Total Steps: 144
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11054/200000  | Episode Reward: 7.0  | Average Reward 4.00  | Actor loss: -0.35 | Critic loss: 3.16 | Entropy loss: -0.0140  | Total Loss: 2.80 | Total Steps: 142
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11076/200000  | Episode Reward: 4.0  | Average Reward 3.73  | Actor loss: -0.20 | Critic loss: 3.61 | Entropy loss: -0.0091  | Total Loss: 3.40 | Total Steps: 168
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11077/200000  | Episode Reward: 10.0  | Average Reward 3.81  | Actor loss: 0.48 | Critic loss: 4.55 | Entropy loss: -0.0012  | Total Loss: 5.03 | Total Steps: 18
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11078/200000  | Episode Reward: 4.0  | Average Reward 3.79  | Actor loss: -0.01 | Critic loss: 6.63 | Entropy loss: -0.0065  | Total Loss: 6.61 | Total Steps: 99
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11101/200000  | Episode Reward: 7.0  | Average Reward 3.86  | Actor loss: -0.17 | Critic loss: 3.38 | Entropy loss: -0.0064  | Total Loss: 3.21 | Total Steps: 134
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11102/200000  | Episode Reward: 1.0  | Average Reward 3.81  | Actor loss: -0.44 | Critic loss: 4.86 | Entropy loss: -0.0156  | Total Loss: 4.41 | Total Steps: 112
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11103/200000  | Episode Reward: 1.0  | Average Reward 3.77  | Actor loss: -0.33 | Critic loss: 6.17 | Entropy loss: -0.0082  | Total Loss: 5.83 | Total Steps: 101
--- target colour

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11126/200000  | Episode Reward: 4.0  | Average Reward 3.78  | Actor loss: -0.07 | Critic loss: 5.79 | Entropy loss: -0.0117  | Total Loss: 5.71 | Total Steps: 103
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11127/200000  | Episode Reward: -2.0  | Average Reward 3.75  | Actor loss: -0.29 | Critic loss: 6.18 | Entropy loss: -0.0116  | Total Loss: 5.88 | Total Steps: 143
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11128/200000  | Episode Reward: -5.0  | Average Reward 3.69  | Actor loss: -0

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11151/200000  | Episode Reward: 1.0  | Average Reward 3.60  | Actor loss: -0.32 | Critic loss: 5.61 | Entropy loss: -0.0139  | Total Loss: 5.28 | Total Steps: 105
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11152/200000  | Episode Reward: 4.0  | Average Reward 3.58  | Actor loss: 0.10 | Critic loss: 4.11 | Entropy loss: -0.0141  | Total Loss: 4.20 | Total Steps: 114
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11153/200000  | Episode Reward: 10.0  | Average Reward 3.63  | Actor loss: 0.06 | Critic loss: 6.93 | Entropy loss: -0.0002  | Total Loss: 6.99 | Total Steps: 17
--- target colour: red, target object: sphere ---
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11177/200000  | Episode Reward: 6.0  | Average Reward 4.04  | Actor loss: -0.05 | Critic loss: 3.19 | Entropy loss: -0.0180  | Total Loss: 3.12 | Total Steps: 176
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11178/200000  | Episode Reward: 10.0  | Average Reward 4.05  | Actor loss: 0.95 | Critic loss: 3.56 | Entropy loss: -0.0022  | Total Loss: 4.51 | Total Steps: 18
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11179/200000  | Episode Reward: 4.0  | Average Reward 4.07  | Actor loss: 0.07 | Critic loss: 7.09 | Entropy loss: -0.0107  | Total Loss: 7.16 | Total Steps: 108
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11202/200000  | Episode Reward: 10.0  | Average Reward 4.07  | Actor loss: 0.03 | Critic loss: 5.90 | Entropy loss: -0.0001  | Total Loss: 5.93 | Total Steps: 17
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11203/200000  | Episode Reward: 10.0  | Average Reward 4.09  | Actor loss: 0.67 | Critic loss: 4.65 | Entropy loss: -0.0037  | Total Loss: 5.31 | Total Steps: 22
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11204/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: -0.45 | Critic loss: 4.59 | Entropy loss: -0.0252  | Total Loss: 4.11 | Total Steps: 197
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11205/200000  | Episode Reward: 10.0  | Aver

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11227/200000  | Episode Reward: 10.0  | Average Reward 4.10  | Actor loss: 0.12 | Critic loss: 3.81 | Entropy loss: -0.0003  | Total Loss: 3.93 | Total Steps: 21
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11228/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: -0.02 | Critic loss: 3.19 | Entropy loss: -0.0222  | Total Loss: 3.14 | Total Steps: 163
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11229/200000  | Episode Reward: 10.0  | Average Reward 4.08  | Actor loss: 1.86 | Critic loss: 11.26 | Entropy loss: -0.0088  | Total Loss: 13.11 | Total Steps: 51
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11252/200000  | Episode Reward: 10.0  | Average Reward 4.39  | Actor loss: 0.80 | Critic loss: 3.16 | Entropy loss: -0.0024  | Total Loss: 3.96 | Total Steps: 21
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11253/200000  | Episode Reward: 4.0  | Average Reward 4.41  | Actor loss: -0.38 | Critic loss: 5.70 | Entropy loss: -0.0143  | Total Loss: 5.31 | Total Steps: 134
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11254/200000  | Episode Reward: 10.0  | Average Reward 4.42  | Actor loss: 0.09 | Critic loss: 1.95 | Entropy loss: -0.0005  | Total Loss: 2.05 | Total Steps: 21
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11277/200000  | Episode Reward: 4.0  | Average Reward 4.45  | Actor loss: -0.09 | Critic loss: 3.61 | Entropy loss: -0.0095  | Total Loss: 3.51 | Total Steps: 137
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11278/200000  | Episode Reward: 10.0  | Average Reward 4.48  | Actor loss: -0.04 | Critic loss: 2.95 | Entropy loss: -0.0139  | Total Loss: 2.89 | Total Steps: 141
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11279/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: -0.04 | Critic loss: 5.16 | Entropy loss: -0.0071  | Total Loss: 5.11 | Total Steps: 103
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11303/200000  | Episode Reward: 10.0  | Average Reward 4.78  | Actor loss: -0.08 | Critic loss: 2.97 | Entropy loss: -0.0077  | Total Loss: 2.88 | Total Steps: 123
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11304/200000  | Episode Reward: 3.0  | Average Reward 4.79  | Actor loss: -0.31 | Critic loss: 4.88 | Entropy loss: -0.0159  | Total Loss: 4.56 | Total Steps: 109
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11305/200000  | Episode Reward: 6.0  | Average Reward 4.78  | Actor loss: -0.13 | Critic loss: 3.39 | Entropy loss: -0.0143  | Total Loss: 3.24 | Total Steps: 189
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11327/200000  | Episode Reward: 4.0  | Average Reward 4.64  | Actor loss: -0.23 | Critic loss: 5.55 | Entropy loss: -0.0117  | Total Loss: 5.31 | Total Steps: 111
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11328/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: 0.04 | Critic loss: 6.26 | Entropy loss: -0.0001  | Total Loss: 6.30 | Total Steps: 17
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11329/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: 0.42 | Critic loss: 4.25 | Entropy loss: -0.0011  | Total Loss: 4.67 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11330/200000  | Episode Reward: 10.0  | Average

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11353/200000  | Episode Reward: 6.0  | Average Reward 4.87  | Actor loss: -0.79 | Critic loss: 4.56 | Entropy loss: -0.0310  | Total Loss: 3.74 | Total Steps: 155
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11354/200000  | Episode Reward: 10.0  | Average Reward 4.87  | Actor loss: 0.36 | Critic loss: 3.56 | Entropy loss: -0.0012  | Total Loss: 3.92 | Total Steps: 16
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11355/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.24 | Critic loss: 6.89 | Entropy loss: -0.0072  | Total Loss: 6.64 | Total Steps: 103
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11378/200000  | Episode Reward: -2.0  | Average Reward 4.57  | Actor loss: -0.14 | Critic loss: 6.72 | Entropy loss: -0.0099  | Total Loss: 6.56 | Total Steps: 152
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11379/200000  | Episode Reward: 7.0  | Average Reward 4.58  | Actor loss: -0.29 | Critic loss: 3.43 | Entropy loss: -0.0252  | Total Loss: 3.12 | Total Steps: 120
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11380/200000  | Episode Reward: 1.0  | Average Reward 4.58  | Actor loss: 0.47 | Critic loss: 2.64 | Entropy loss: -0.0052  | Total Loss: 3

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11403/200000  | Episode Reward: 7.0  | Average Reward 4.71  | Actor loss: -0.62 | Critic loss: 7.70 | Entropy loss: -0.0070  | Total Loss: 7.07 | Total Steps: 72
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11404/200000  | Episode Reward: 6.0  | Average Reward 4.70  | Actor loss: -0.34 | Critic loss: 3.49 | Entropy loss: -0.0216  | Total Loss: 3.13 | Total Steps: 156
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11405/200000  | Episode Reward: 1.0  | Average Reward 4.66  | Actor loss: -0.25 | Critic loss: 4.30 | Entropy loss: -0.0138  | Total Loss: 4.04 | Total Steps: 194
--- target colour: yellow, target object: sphere ---
Decision Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11428/200000  | Episode Reward: 4.0  | Average Reward 4.77  | Actor loss: -0.20 | Critic loss: 5.08 | Entropy loss: -0.0117  | Total Loss: 4.86 | Total Steps: 137
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11429/200000  | Episode Reward: 4.0  | Average Reward 4.74  | Actor loss: -0.33 | Critic loss: 6.30 | Entropy loss: -0.0083  | Total Loss: 5.96 | Total Steps: 111
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11430/200000  | Episode Reward: -2.0  | Average Reward 4.71  | Actor loss: -0.33 | Critic loss: 7.38 | Entropy loss: -0.0120  | Total Loss: 7.04 | Total Steps: 182
--- target colour:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11453/200000  | Episode Reward: 10.0  | Average Reward 4.72  | Actor loss: -0.05 | Critic loss: 2.78 | Entropy loss: -0.0078  | Total Loss: 2.72 | Total Steps: 143
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11454/200000  | Episode Reward: -5.0  | Average Reward 4.64  | Actor loss: -0.11 | Critic loss: 7.61 | Entropy loss: -0.0132  | Total Loss: 7.49 | Total Steps: 146
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11455/200000  | Episode Reward: -9.0  | Average Reward 4.58  | Acto

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11478/200000  | Episode Reward: 4.0  | Average Reward 4.77  | Actor loss: -0.45 | Critic loss: 3.79 | Entropy loss: -0.0115  | Total Loss: 3.33 | Total Steps: 141
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11479/200000  | Episode Reward: -2.0  | Average Reward 4.71  | Actor loss: -0.20 | Critic loss: 4.66 | Entropy loss: -0.0192  | Total Loss: 4.44 | Total Steps: 197
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11480/200000  | Episode Reward: -5.0  | Average Reward 4.68  | Actor loss: -0.43

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11503/200000  | Episode Reward: 9.0  | Average Reward 4.48  | Actor loss: 0.06 | Critic loss: 2.09 | Entropy loss: -0.0215  | Total Loss: 2.12 | Total Steps: 138
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11504/200000  | Episode Reward: 10.0  | Average Reward 4.51  | Actor loss: 0.62 | Critic loss: 5.71 | Entropy loss: -0.0010  | Total Loss: 6.33 | Total Steps: 18
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11505/200000  | Episode Reward: 4.0  | Average Reward 4.50  | Actor loss: -0.79 | Critic loss: 4.01 | Entropy loss: -0.0185  | Total Loss: 3.20 | Total Steps: 139
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11506/200000  | Episode R

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11528/200000  | Episode Reward: 1.0  | Average Reward 4.57  | Actor loss: -0.16 | Critic loss: 4.61 | Entropy loss: -0.0110  | Total Loss: 4.44 | Total Steps: 135
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11529/200000  | Episode Reward: 7.0  | Average Reward 4.55  | Actor loss: 0.12 | Critic loss: 7.61 | Entropy loss: -0.0064  | Total Loss: 7.72 | Total Steps: 76
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11530/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.52 | Critic loss: 5.73 | Entropy loss: -0.0198  | Total Loss: 5.18 | Total Step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11553/200000  | Episode Reward: 4.0  | Average Reward 4.33  | Actor loss: -0.22 | Critic loss: 4.34 | Entropy loss: -0.0120  | Total Loss: 4.11 | Total Steps: 106
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11554/200000  | Episode Reward: 10.0  | Average Reward 4.33  | Actor loss: 0.74 | Critic loss: 10.17 | Entropy loss: -0.0062  | Total Loss: 10.91 | Total Steps: 53
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11555/200000  | Episode Reward: 10.0  | Average Reward 4.37  | Actor loss: -0.06 | Critic loss: 2.74 | Entropy loss: -0.0159  | Total Loss: 2.66 | Total Steps: 108
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11579/200000  | Episode Reward: 10.0  | Average Reward 4.61  | Actor loss: 0.43 | Critic loss: 4.70 | Entropy loss: -0.0011  | Total Loss: 5.14 | Total Steps: 16
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11580/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.18 | Critic loss: 4.75 | Entropy loss: -0.0114  | Total Loss: 4.56 | Total Steps: 104
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11581/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.04 | Critic loss: 6.59 | Entropy loss: -0.0095  | Total Loss: 6.55 | Total Steps: 107
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11604/200000  | Episode Reward: 4.0  | Average Reward 4.55  | Actor loss: -0.07 | Critic loss: 4.78 | Entropy loss: -0.0161  | Total Loss: 4.69 | Total Steps: 111
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11605/200000  | Episode Reward: 1.0  | Average Reward 4.55  | Actor loss: 0.45 | Critic loss: 3.02 | Entropy loss: -0.0018  | Total Loss: 3.47 | Total Steps: 274
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11606/200000  | Episode Reward: 4.0  | Average Reward 4.58  | Actor loss: -0.41 | Critic loss: 3.39 | Entropy loss: -0.0209  | Total Loss: 2.96 | Total Steps: 168
--- targe

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11629/200000  | Episode Reward: 7.0  | Average Reward 4.47  | Actor loss: -0.56 | Critic loss: 3.47 | Entropy loss: -0.0102  | Total Loss: 2.89 | Total Steps: 140
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11630/200000  | Episode Reward: 10.0  | Average Reward 4.54  | Actor loss: -0.15 | Critic loss: 2.55 | Entropy loss: -0.0041  | Total Loss: 2.39 | Total Steps: 105
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11631/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.10 | Critic loss: 3.26 | Entropy loss: -0.0004  | Total Loss: 3.36 | Total Steps: 21
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11632/200000  | Episode Reward: 10.0  | A

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11655/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.62 | Critic loss: 6.90 | Entropy loss: -0.0080  | Total Loss: 7.51 | Total Steps: 69
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11656/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.86 | Critic loss: 4.24 | Entropy loss: -0.0016  | Total Loss: 5.10 | Total Steps: 20
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11657/200000  | Episode Reward: 4.0  | Average Reward 4.84  | Actor loss: -0.17 | Critic loss: 5.59 | Entropy loss: -0.0102  | Total Loss: 5.40 | Total Steps: 107
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11681/200000  | Episode Reward: -5.0  | Average Reward 4.97  | Actor loss: -0.77 | Critic loss: 8.45 | Entropy loss: -0.0168  | Total Loss: 7.67 | Total Steps: 173
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11682/200000  | Episode Reward: 10.0  | Average Reward 4.99  | Actor loss: 0.41 | Critic loss: 4.42 | Entropy loss: -0.0012  | Total Loss: 4.83 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11683/200000  | Episode Reward: 4.0  | Average Reward 4.96  | Actor loss: -0.47 | Critic loss: 4.19 | Entropy loss: -0.0109  | Total Loss: 3.71 | Total Steps: 135
--- target colour

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11706/200000  | Episode Reward: 4.0  | Average Reward 5.00  | Actor loss: -0.36 | Critic loss: 5.37 | Entropy loss: -0.0093  | Total Loss: 5.00 | Total Steps: 96
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11707/200000  | Episode Reward: 1.0  | Average Reward 4.98  | Actor loss: -0.63 | Critic loss: 6.13 | Entropy loss: -0.0121  | Total Loss: 5.49 | Total Steps: 137
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11708/200000  | Episode Reward: 4.0  | Average Reward 5.00  | Actor loss: -0.40 | Critic loss: 5.00 | Entropy loss: -0.0179  | Total Loss: 4.58 | Total Steps: 163
--- target colour: blue, target object: capsule ---
Decision Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11730/200000  | Episode Reward: -2.0  | Average Reward 4.92  | Actor loss: -0.74 | Critic loss: 5.67 | Entropy loss: -0.0268  | Total Loss: 4.90 | Total Steps: 212
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11731/200000  | Episode Reward: 10.0  | Average Reward 4.96  | Actor loss: 0.09 | Critic loss: 3.54 | Entropy loss: -0.0130  | Total Loss: 3.62 | Total Steps: 117
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11732/200000  | Episode Reward: 4.0  | Average Reward 4.97  | Actor loss: -0.26 | Critic loss: 3.86 | Entropy loss: -0.0108  | Total Loss: 3.59 | Total Steps: 167
--- target colour: black, target object: capsule ---
Decision Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11755/200000  | Episode Reward: 1.0  | Average Reward 4.84  | Actor loss: -0.40 | Critic loss: 5.38 | Entropy loss: -0.0173  | Total Loss: 4.96 | Total Steps: 182
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11756/200000  | Episode Reward: 1.0  | Average Reward 4.83  | Actor loss: -0.59 | Critic loss: 4.44 | Entropy loss: -0.0192  | Total Loss: 3.83 | Total Steps: 171
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11757/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.15 | Critic loss: 5.59 | Entropy loss: -0.0058  | Total Loss: 5.43 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11781/200000  | Episode Reward: 10.0  | Average Reward 4.91  | Actor loss: 0.50 | Critic loss: 3.76 | Entropy loss: -0.0016  | Total Loss: 4.26 | Total Steps: 17
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11782/200000  | Episode Reward: 10.0  | Average Reward 4.94  | Actor loss: 0.73 | Critic loss: 3.78 | Entropy loss: -0.0016  | Total Loss: 4.51 | Total Steps: 21
--- target colour: black, target object: capsule ---
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11783/200000  | Episode Reward: 7.0  | Average Reward 5.04  | Actor loss: -0.43 | Critic loss: 4.52 | Entropy loss: -0.0023  | Total Loss: 4.08 | Total Steps: 301
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11784/200000  | Episode Reward: 9.0 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11807/200000  | Episode Reward: 1.0  | Average Reward 4.97  | Actor loss: 0.61 | Critic loss: 4.81 | Entropy loss: -0.0043  | Total Loss: 5.41 | Total Steps: 288
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11808/200000  | Episode Reward: 7.0  | Average Reward 4.99  | Actor loss: -0.50 | Critic loss: 4.21 | Entropy loss: -0.0296  | Total Loss: 3.68 | Total Steps: 238
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11809/200000  | Episode Reward: 1.0  | Average Reward 5.01  | Actor loss: -0.35 | Critic loss: 4.78 | Entropy loss: -0.0145  | Total Loss: 4.43 | Total Steps: 116
--- target 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11832/200000  | Episode Reward: -2.0  | Average Reward 5.06  | Actor loss: -0.87 | Critic loss: 7.62 | Entropy loss: -0.0182  | Total Loss: 6.73 | Total Steps: 165
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11833/200000  | Episode Reward: 10.0  | Average Reward 5.06  | Actor loss: 0.04 | Critic loss: 2.93 | Entropy loss: -0.0190  | Total Loss: 2.96 | Total Steps: 153
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11834/200000  | Episode Reward: 4.0  | Average Reward 5.05  | Actor loss: -0.22 | Critic loss: 5.66 | Entropy loss: -0.0045  | Total Loss: 5.43 | Total Steps: 163
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | E

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11857/200000  | Episode Reward: 1.0  | Average Reward 4.87  | Actor loss: -0.59 | Critic loss: 5.15 | Entropy loss: -0.0187  | Total Loss: 4.54 | Total Steps: 178
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11858/200000  | Episode Reward: 10.0  | Average Reward 4.92  | Actor loss: -0.02 | Critic loss: 2.93 | Entropy loss: -0.0152  | Total Loss: 2.90 | Total Steps: 122
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11859/200000  | Episode Reward: 7.0  | Average Reward 4.90  | Actor loss: -0.36 | Critic loss: 3.05 | Entropy loss: -0.0197  | Total Loss: 2.67 | Total Steps: 150
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11882/200000  | Episode Reward: 4.0  | Average Reward 4.68  | Actor loss: -0.17 | Critic loss: 4.82 | Entropy loss: -0.0084  | Total Loss: 4.64 | Total Steps: 143
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11883/200000  | Episode Reward: 10.0  | Average Reward 4.71  | Actor loss: 0.55 | Critic loss: 4.95 | Entropy loss: -0.0018  | Total Loss: 5.49 | Total Steps: 17
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11884/200000  | Episode Reward: 4.0  | Average Reward 4.72  | Actor loss: -0.03 | Critic loss: 6.53 | Entropy loss: -0.0057  | Total Loss: 6.49 | Total Steps: 105
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11906/200000  | Episode Reward: 10.0  | Average Reward 4.07  | Actor loss: 0.45 | Critic loss: 4.94 | Entropy loss: -0.0011  | Total Loss: 5.40 | Total Steps: 16
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11907/200000  | Episode Reward: -2.0  | Average Reward 4.05  | Actor loss: 0.19 | Critic loss: 7.84 | Entropy loss: -0.0004  | Total Loss: 8.02 | Total Steps: 269
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11908/200000  | Episode Reward: 10.0  | Average Reward 4.08  | Actor loss: 0.49 | Critic loss: 3.29 | Entropy loss: -0.0014  | Total Loss: 3.78 | Total Steps: 18
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision St

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11930/200000  | Episode Reward: -4.0  | Average Reward 4.12  | Actor loss: -0.61 | Critic loss: 5.78 | Entropy loss: -0.0248  | Total Loss: 5.15 | Total Steps: 166
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11931/200000  | Episode Reward: 1.0  | Average Reward 4.08  | Actor loss: -0.32 | Critic loss: 4.97 | Entropy loss: -0.0211  | Total Loss: 4.62 | Total Steps: 239
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11932/200000  | Episode Reward: -5.0  | Average Reward 4.04  | Actor loss: -0.18 | Critic loss: 6.77 | Entropy loss: -0.0149  | Total Loss: 6.58 | Total S

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11954/200000  | Episode Reward: -3.0  | Average Reward 3.97  | Actor loss: -0.31 | Critic loss: 4.71 | Entropy loss: -0.0206  | Total Loss: 4.38 | Total Steps: 156
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11955/200000  | Episode Reward: 4.0  | Average Reward 3.98  | Actor loss: 0.55 | Critic loss: 5.98 | Entropy loss: -0.0017  | Total Loss: 6.53 | Total Steps: 277
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11956/200000  | Episode Reward: 1.0  | Average Reward 3.98  | Actor loss: -0.37 | Critic loss: 5.84 | Entropy loss: -0.0114  | Total Loss: 5.46 | Total Steps: 105
--- ta

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11979/200000  | Episode Reward: 10.0  | Average Reward 3.80  | Actor loss: 0.12 | Critic loss: 4.04 | Entropy loss: -0.0164  | Total Loss: 4.14 | Total Steps: 114
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11980/200000  | Episode Reward: 10.0  | Average Reward 3.85  | Actor loss: 0.40 | Critic loss: 3.44 | Entropy loss: -0.0011  | Total Loss: 3.84 | Total Steps: 17
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 11981/200000  | Episode Reward: 1.0  | Average Reward 3.80  | Actor loss: -0.39 | Critic loss: 4.34 | Entropy loss: -0.0132  | Total Loss: 3.94 | Total Steps: 139
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12004/200000  | Episode Reward: -5.0  | Average Reward 3.77  | Actor loss: -0.65 | Critic loss: 7.48 | Entropy loss: -0.0109  | Total Loss: 6.81 | Total Steps: 138
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12005/200000  | Episode Reward: -2.0  | Average Reward 3.71  | Actor loss: -0.38 | Critic loss: 4.57 | Entropy loss: -0.0252  | Total Loss: 4.17 | Total Steps: 193
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12006/200000  | Episode Reward: 7.0  | Average Reward 3.69  | Actor loss: -0.14 | Critic loss: 2.58 | Entropy loss: -0.0114  | Total Loss: 2.44 | Total Steps: 143
--- target colour: black, target object: cube ---
Agent in ter

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12029/200000  | Episode Reward: 1.0  | Average Reward 3.58  | Actor loss: -0.39 | Critic loss: 6.09 | Entropy loss: -0.0166  | Total Loss: 5.68 | Total Steps: 139
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12030/200000  | Episode Reward: -2.0  | Average Reward 3.52  | Actor loss: -0.59 | Critic loss: 6.93 | Entropy loss: -0.0153  | Total Loss: 6.32 | Total Steps: 179
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12031/200000  | Episode Reward: 1.0  | Average Reward 3.52  | Actor loss: -0.22 | Critic loss: 5.49 | En

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12053/200000  | Episode Reward: -3.0  | Average Reward 3.53  | Actor loss: -0.61 | Critic loss: 4.59 | Entropy loss: -0.0233  | Total Loss: 3.96 | Total Steps: 161
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12054/200000  | Episode Reward: 7.0  | Average Reward 3.52  | Actor loss: 0.21 | Critic loss: 3.91 | Entropy loss: -0.0107  | Total Loss: 4.11 | Total Steps: 141
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12055/200000  | Episode Reward: 4.0  | Average Reward 3.53  | Actor loss: -0.00 | Critic loss: 3.28 | Entropy loss: -0.0204  | Total Loss: 3.26 | Total Steps: 174
--- target colou

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12078/200000  | Episode Reward: 10.0  | Average Reward 3.56  | Actor loss: 0.51 | Critic loss: 3.81 | Entropy loss: -0.0017  | Total Loss: 4.32 | Total Steps: 17
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12079/200000  | Episode Reward: -2.0  | Average Reward 3.56  | Actor loss: -0.42 | Critic loss: 6.64 | Entropy loss: -0.0134  | Total Loss: 6.21 | Total Steps: 176
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12080/200000  | Episode Reward: 10.0  | Average Reward 3.58  | Actor loss: 0.85 | Critic loss: 6.93 | Entropy loss: -0.0011  | Total Loss: 7.78 | Total Steps: 18
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12104/200000  | Episode Reward: 4.0  | Average Reward 4.24  | Actor loss: -0.18 | Critic loss: 4.40 | Entropy loss: -0.0106  | Total Loss: 4.21 | Total Steps: 110
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12105/200000  | Episode Reward: 4.0  | Average Reward 4.21  | Actor loss: -0.43 | Critic loss: 5.02 | Entropy loss: -0.0201  | Total Loss: 4.57 | Total Steps: 187
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12106/200000  | Episode Reward: -3.0  | Average Reward 4.14  | Actor loss: -0.74 | Critic loss: 5.90 | Entropy loss: -0.0156  | Total Loss: 5.14 | Total Steps

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12130/200000  | Episode Reward: 1.0  | Average Reward 4.45  | Actor loss: -0.15 | Critic loss: 4.73 | Entropy loss: -0.0116  | Total Loss: 4.57 | Total Steps: 124
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12131/200000  | Episode Reward: 10.0  | Average Reward 4.49  | Actor loss: -0.30 | Critic loss: 2.32 | Entropy loss: -0.0236  | Total Loss: 2.00 | Total Steps: 147
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12132/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: 0.02 | Critic loss: 4.11 | Entropy loss: -0.0137  | Total Loss: 4.12 | Total Steps: 149
--- target colour: black, target object: capsule ---
Decision Step 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12156/200000  | Episode Reward: 1.0  | Average Reward 4.73  | Actor loss: -0.57 | Critic loss: 6.50 | Entropy loss: -0.0067  | Total Loss: 5.92 | Total Steps: 100
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12157/200000  | Episode Reward: 4.0  | Average Reward 4.71  | Actor loss: -0.05 | Critic loss: 4.40 | Entropy loss: -0.0148  | Total Loss: 4.33 | Total Steps: 113
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12180/200000  | Episode Reward: 10.0  | Average Reward 4.50  | Actor loss: 0.09 | Critic loss: 5.19 | Entropy loss: -0.0095  | Total Loss: 5.27 | Total Steps: 104
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12181/200000  | Episode Reward: 7.0  | Average Reward 4.53  | Actor loss: -0.27 | Critic loss: 3.56 | Entropy loss: -0.0146  | Total Loss: 3.27 | Total Steps: 132
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12182/200000  | Episode Reward: -2.0  | Average Reward 4.50  | Actor loss: -0.55 | Critic loss: 4.83 | Entropy loss: -0.0175  | Total Loss: 4.26 | Total Steps: 179
--- target colour: blue, target object: prism ---
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12205/200000  | Episode Reward: 4.0  | Average Reward 4.44  | Actor loss: -0.12 | Critic loss: 6.87 | Entropy loss: -0.0079  | Total Loss: 6.74 | Total Steps: 101
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12206/200000  | Episode Reward: 7.0  | Average Reward 4.44  | Actor loss: -0.50 | Critic loss: 3.18 | Entropy loss: -0.0157  | Total Loss: 2.66 | Total Steps: 161
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12207/200000  | Episode Reward: 10.0  | Average Reward 4.44  | Actor loss: 1.90 | Critic loss: 5.44 | Entropy loss: -0.0039  | Total Loss: 7.33 | Total Steps: 19
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episo

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12231/200000  | Episode Reward: 1.0  | Average Reward 4.58  | Actor loss: -0.45 | Critic loss: 5.49 | Entropy loss: -0.0118  | Total Loss: 5.02 | Total Steps: 167
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12232/200000  | Episode Reward: 4.0  | Average Reward 4.55  | Actor loss: -0.45 | Critic loss: 5.53 | Entropy loss: -0.0213  | Total Loss: 5.06 | Total Steps: 177
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12233/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 0.81 | Critic loss: 5.13 | Entropy loss: -0.0022  | Total Loss: 5.94 | Total Steps: 18
--- target colour: black, target object: cylinder ---
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12257/200000  | Episode Reward: 10.0  | Average Reward 4.98  | Actor loss: -0.39 | Critic loss: 4.05 | Entropy loss: -0.0078  | Total Loss: 3.64 | Total Steps: 98
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12258/200000  | Episode Reward: 10.0  | Average Reward 5.07  | Actor loss: 0.73 | Critic loss: 2.07 | Entropy loss: -0.0023  | Total Loss: 2.80 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12259/200000  | Episode Reward: 1.0  | Average Reward 5.08  | Actor loss: -0.27 | Critic loss: 6.06 | Entropy loss: -0.0055  | Total Loss: 5.79 | Total Steps: 139
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12281/200000  | Episode Reward: 3.0  | Average Reward 4.79  | Actor loss: -0.39 | Critic loss: 5.41 | Entropy loss: -0.0124  | Total Loss: 5.00 | Total Steps: 161
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12282/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 0.33 | Critic loss: 2.96 | Entropy loss: -0.0015  | Total Loss: 3.29 | Total Steps: 17
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12283/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 0.42 | Critic loss: 6.49 | Entropy loss: -0.0130  | Total Loss: 6.90 | Total Steps: 90
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12307/200000  | Episode Reward: 0.0  | Average Reward 5.01  | Actor loss: -0.59 | Critic loss: 4.25 | Entropy loss: -0.0233  | Total Loss: 3.64 | Total Steps: 151
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12308/200000  | Episode Reward: 1.0  | Average Reward 4.97  | Actor loss: -0.44 | Critic loss: 6.78 | Entropy loss: -0.0122  | Total Loss: 6.33 | Total Steps: 174
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12309/200000  | Episode Reward: 10.0  | Average Reward 5.01  | Actor loss: -0.25 | Critic loss: 3.86 | Entropy loss: -0.0127  | Total Loss: 3.59 | Total Steps: 100
--- target colour: red, target object: prism ---
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12331/200000  | Episode Reward: 10.0  | Average Reward 4.68  | Actor loss: 0.17 | Critic loss: 3.33 | Entropy loss: -0.0172  | Total Loss: 3.48 | Total Steps: 103
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12332/200000  | Episode Reward: 4.0  | Average Reward 4.68  | Actor loss: -0.12 | Critic loss: 3.83 | Entropy loss: -0.0143  | Total Loss: 3.69 | Total Steps: 158
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12333/200000  | Episode Reward: 4.0  | Average Reward 4.71  | Actor loss: -0.48 | Critic loss: 3.79 | Entropy loss: -0.0178  | Total Loss: 3.29 | Total Steps: 161
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12357/200000  | Episode Reward: 4.0  | Average Reward 4.80  | Actor loss: -0.21 | Critic loss: 3.48 | Entropy loss: -0.0266  | Total Loss: 3.24 | Total Steps: 178
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12358/200000  | Episode Reward: 2.0  | Average Reward 4.95  | Actor loss: -0.69 | Critic loss: 5.01 | Entropy loss: -0.0257  | Total Loss: 4.30 | Total Steps: 143
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12359/200000  | Episode Reward: -2.0  | Average Reward 4.93  | Actor loss: -0.27 | Critic loss: 5.90 | Entropy loss: -0.0195  | Total Loss: 5.61 | Total Steps:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12382/200000  | Episode Reward: 1.0  | Average Reward 5.07  | Actor loss: -0.54 | Critic loss: 5.22 | Entropy loss: -0.0119  | Total Loss: 4.66 | Total Steps: 157
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12383/200000  | Episode Reward: 1.0  | Average Reward 5.05  | Actor loss: -0.33 | Critic loss: 6.55 | Entropy loss: -0.0057  | Total Loss: 6.22 | Total Steps: 157
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12384/200000  | Episode Reward: -3.0  | Average Reward 4.99  | Actor loss: 0.93 | Critic loss: 8.33 | Entropy loss: -

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12408/200000  | Episode Reward: -8.0  | Average Reward 5.05  | Actor loss: -0.07 | Critic loss: 2.72 | Entropy loss: -0.0032  | Total Loss: 2.65 | Total Steps: 302
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12409/200000  | Episode Reward: 10.0  | Average Reward 5.08  | Actor loss: 0.14 | Critic loss: 3.47 | Entropy loss: -0.0161  | Total Loss: 3.60 | Total Steps: 105
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12410/200000  | Episode Reward: 8.0  | Average Reward 5.08  | Actor loss: -0.46 | Critic loss: 2.62 | Entropy loss: -0.0220  | Total Loss: 2.15 | Total Steps: 145
--- target colour: blue, targ

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12433/200000  | Episode Reward: 10.0  | Average Reward 5.11  | Actor loss: 0.20 | Critic loss: 2.41 | Entropy loss: -0.0018  | Total Loss: 2.61 | Total Steps: 22
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12434/200000  | Episode Reward: 10.0  | Average Reward 5.11  | Actor loss: -0.17 | Critic loss: 1.60 | Entropy loss: -0.0238  | Total Loss: 1.40 | Total Steps: 182
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12435/200000  | Episode Reward: 10.0  | Average Reward 5.12  | Actor loss: 0.38 | Critic loss: 2.76 | Entropy loss: -0.0015  | Total Loss: 3.14 | Total Steps: 17
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12436/200000  | Episode Reward: 10.0  | Average Reward 5.12  | Acto

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12459/200000  | Episode Reward: -2.0  | Average Reward 4.96  | Actor loss: -0.49 | Critic loss: 5.53 | Entropy loss: -0.0176  | Total Loss: 5.02 | Total Steps: 178
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12460/200000  | Episode Reward: 4.0  | Average Reward 4.96  | Actor loss: -0.35 | Critic loss: 4.29 | Entropy loss: -0.0108  | Total Loss: 3.93 | Total Steps: 137
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12461/200000  | Episode Reward: 7.0  | Average Reward 5.08  | Actor loss: -0.35 | Critic loss: 4.35 | Entropy loss: -0.0146  | Total Loss: 3.99 | Total Steps: 155
--- target co

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12484/200000  | Episode Reward: 4.0  | Average Reward 5.19  | Actor loss: -0.34 | Critic loss: 4.72 | Entropy loss: -0.0118  | Total Loss: 4.37 | Total Steps: 144
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12485/200000  | Episode Reward: 1.0  | Average Reward 5.16  | Actor loss: -0.55 | Critic loss: 6.34 | Entropy loss: -0.0142  | Total Loss: 5.78 | Total Steps: 226
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12486/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: 0.91 | Critic loss: 4.50 | Entropy loss: -0.0021  | Total Loss: 5.41 | Total Steps: 18
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12509/200000  | Episode Reward: 4.0  | Average Reward 5.01  | Actor loss: -0.16 | Critic loss: 4.04 | Entropy loss: -0.0149  | Total Loss: 3.86 | Total Steps: 109
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12510/200000  | Episode Reward: -2.0  | Average Reward 5.04  | Actor loss: -0.33 | Critic loss: 4.63 | Entropy loss: -0.0141  | Total Loss: 4.29 | Total Steps: 141
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12511/200000  | Episode Reward: 7.0  | Average Reward 5.03  | Actor loss: -0.23 | Critic loss: 3.04 | Entropy loss: -0.0118  | Total Loss: 2.80 | Total Steps: 163
--- target colour:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12535/200000  | Episode Reward: 4.0  | Average Reward 5.20  | Actor loss: -0.12 | Critic loss: 4.05 | Entropy loss: -0.0245  | Total Loss: 3.90 | Total Steps: 123
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12536/200000  | Episode Reward: 10.0  | Average Reward 5.20  | Actor loss: -0.21 | Critic loss: 2.80 | Entropy loss: -0.0097  | Total Loss: 2.58 | Total Steps: 130
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12537/200000  | Episode Reward: 3.0  | Average Reward 5.16  | Actor loss: -0.39 | Critic loss: 3.29 | Entropy loss: -0.0243  | Total Loss: 2.87 | Total Steps: 127
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal S

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12560/200000  | Episode Reward: 7.0  | Average Reward 5.09  | Actor loss: -0.40 | Critic loss: 6.73 | Entropy loss: -0.0067  | Total Loss: 6.32 | Total Steps: 91
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12561/200000  | Episode Reward: 1.0  | Average Reward 5.04  | Actor loss: -0.51 | Critic loss: 5.56 | Entropy loss: -0.0144  | Total Loss: 5.03 | Total Steps: 149
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12562/200000  | Episode Reward: 6.0  | Average Reward 5.04  | Actor loss: -0.32 | Critic loss: 3.40 | Entropy loss: -0.0194  | Total Loss: 3.06 | Total Steps: 178
--- target colour: red, target object: cylinder ---
Agent in termi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12585/200000  | Episode Reward: 10.0  | Average Reward 5.11  | Actor loss: 0.24 | Critic loss: 3.17 | Entropy loss: -0.0126  | Total Loss: 3.39 | Total Steps: 110
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12586/200000  | Episode Reward: 10.0  | Average Reward 5.11  | Actor loss: 0.34 | Critic loss: 3.29 | Entropy loss: -0.0012  | Total Loss: 3.63 | Total Steps: 16
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12587/200000  | Episode Reward: -11.0  | Average Reward 5.03  | Actor loss: 0.18 | Critic loss: 2.22 | Entropy loss: -0.0012  | Total Loss: 2.40 | Total Steps: 278
--- targe

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12610/200000  | Episode Reward: 10.0  | Average Reward 4.97  | Actor loss: -0.22 | Critic loss: 1.95 | Entropy loss: -0.0152  | Total Loss: 1.72 | Total Steps: 154
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12611/200000  | Episode Reward: 4.0  | Average Reward 5.00  | Actor loss: -0.20 | Critic loss: 4.19 | Entropy loss: -0.0151  | Total Loss: 3.98 | Total Steps: 140
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12612/200000  | Episode Reward: 1.0  | Average Reward 5.02  | Actor loss: -0.32 | Critic loss: 3.78 | Entropy loss: -0.0110  | Total Loss: 3.45 | Total Steps: 149
--- target colour: blue, target object: cube ---
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12635/200000  | Episode Reward: -1.0  | Average Reward 4.76  | Actor loss: -0.81 | Critic loss: 3.62 | Entropy loss: -0.0333  | Total Loss: 2.78 | Total Steps: 159
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12636/200000  | Episode Reward: -5.0  | Average Reward 4.68  | Actor loss: -0.68 | Critic loss: 7.02 | Entropy loss: -0.0183  | Total Loss: 6.33 | Total Steps: 169
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12637/200000  | Episode Reward: 1.0  | Average Reward 4.64  | Actor loss: -0.41

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12660/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.23 | Critic loss: 2.76 | Entropy loss: -0.0090  | Total Loss: 2.98 | Total Steps: 95
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12661/200000  | Episode Reward: 10.0  | Average Reward 4.57  | Actor loss: 0.22 | Critic loss: 4.59 | Entropy loss: -0.0149  | Total Loss: 4.79 | Total Steps: 117
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12662/200000  | Episode Reward: -2.0  | Average Reward 4.57  | Actor loss: -0.62 | Critic loss: 4.89 | Entropy loss: -0.0294  | Total Loss: 4.25 | Total Steps: 186
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12686/200000  | Episode Reward: 3.0  | Average Reward 4.54  | Actor loss: -0.56 | Critic loss: 5.25 | Entropy loss: -0.0259  | Total Loss: 4.67 | Total Steps: 185
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12687/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: -0.12 | Critic loss: 2.10 | Entropy loss: -0.0123  | Total Loss: 1.98 | Total Steps: 143
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12688/200000  | Episode Reward: 4.0  | Average Reward 4.55  | Actor loss: -0.59 | Critic loss: 4.82 | Entropy loss: -0.0158  | Total Loss: 4.21 | Total Steps: 101
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12711/200000  | Episode Reward: -17.0  | Average Reward 4.35  | Actor loss: -0.32 | Critic loss: 5.38 | Entropy loss: -0.0117  | Total Loss: 5.05 | Total Steps: 416
--- target colour: black, target object: prism ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12712/200000  | Episode Reward: 7.0  | Average Reward 4.40  | Actor loss: 0.06 | Critic loss: 12.61 | Entropy loss: -0.0001  | Total Loss: 12.67 | Total Steps: 252
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12736/200000  | Episode Reward: 7.0  | Average Reward 4.33  | Actor loss: -0.32 | Critic loss: 4.37 | Entropy loss: -0.0064  | Total Loss: 4.04 | Total Steps: 127
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12737/200000  | Episode Reward: 4.0  | Average Reward 4.34  | Actor loss: 0.04 | Critic loss: 3.79 | Entropy loss: -0.0250  | Total Loss: 3.80 | Total Steps: 133
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12738/200000  | Episode Reward: -2.0  | Average Reward 4.28  | Actor loss: 0.71 | Critic loss: 3.20 | Entropy loss: -0.0055  | Total Loss: 3.90 | Total Steps: 284
--- ta

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12760/200000  | Episode Reward: 1.0  | Average Reward 4.14  | Actor loss: -0.42 | Critic loss: 2.92 | Entropy loss: -0.0187  | Total Loss: 2.48 | Total Steps: 146
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12761/200000  | Episode Reward: 10.0  | Average Reward 4.19  | Actor loss: 0.22 | Critic loss: 2.17 | Entropy loss: -0.0032  | Total Loss: 2.39 | Total Steps: 26
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12762/200000  | Episode Reward: 10.0  | Average Reward 4.21  | Actor loss: 0.48 | Critic loss: 2.42 | Entropy loss: -0.0026  | Total Loss: 2.89 | Total Steps: 22
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 12785/200000  | Episode Reward: -34.0  | Average Reward 4.11  | Actor loss: -0.79 | Critic loss: 12.73 | Entropy loss: -0.0167  | Total Loss: 11.93 | Total Steps: 500
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12786/200000  | Episode Reward: -2.0  | Average Reward 4.04  | Actor loss: -0.31 | Critic loss: 5.63 | Entropy loss: -0.0208  | Total Loss: 5.30 | Total Steps: 216
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12787/200000  | Episode Reward: 1.0  | Average Reward 4.11  | Actor loss: -0.34 | Critic loss: 5.61 | Entropy loss: -0.0124  | Total Loss: 5.25 | Tot

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12809/200000  | Episode Reward: 10.0  | Average Reward 3.89  | Actor loss: 0.12 | Critic loss: 5.42 | Entropy loss: -0.0092  | Total Loss: 5.53 | Total Steps: 97
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12810/200000  | Episode Reward: 7.0  | Average Reward 3.88  | Actor loss: 0.02 | Critic loss: 5.14 | Entropy loss: -0.0153  | Total Loss: 5.14 | Total Steps: 98
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12811/200000  | Episode Reward: 10.0  | Average Reward 3.90  | Actor loss: 0.50 | Critic loss: 9.72 | Entropy loss: -0.0018  | Total Loss: 10.22 | Total Steps: 19
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12812/200000  | Episode Reward: 10.0  | Average 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12835/200000  | Episode Reward: 3.0  | Average Reward 4.04  | Actor loss: -0.21 | Critic loss: 4.37 | Entropy loss: -0.0187  | Total Loss: 4.15 | Total Steps: 113
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12836/200000  | Episode Reward: 4.0  | Average Reward 4.08  | Actor loss: 0.02 | Critic loss: 4.74 | Entropy loss: -0.0143  | Total Loss: 4.74 | Total Steps: 104
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12837/200000  | Episode Reward: 1.0  | Average Reward 4.08  | Actor loss: -0.41 | Critic loss: 4.64 | Entropy loss: -0.0216  | Total Loss: 4.21 | Total Steps: 176
--- target colo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12859/200000  | Episode Reward: 10.0  | Average Reward 4.09  | Actor loss: 0.29 | Critic loss: 6.18 | Entropy loss: -0.0011  | Total Loss: 6.47 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12860/200000  | Episode Reward: 7.0  | Average Reward 4.08  | Actor loss: -0.27 | Critic loss: 3.14 | Entropy loss: -0.0188  | Total Loss: 2.85 | Total Steps: 170
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12861/200000  | Episode Reward: 7.0  | Average Reward 4.07  | Actor loss: -0.12 | Critic loss: 2.77 | Entropy loss: -0.0280  | Total Loss: 2.63 | Total Steps: 152
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12862/200000  | Epi

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12885/200000  | Episode Reward: 1.0  | Average Reward 4.10  | Actor loss: -0.36 | Critic loss: 5.24 | Entropy loss: -0.0222  | Total Loss: 4.86 | Total Steps: 230
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12886/200000  | Episode Reward: 10.0  | Average Reward 4.13  | Actor loss: 0.03 | Critic loss: 5.06 | Entropy loss: -0.0077  | Total Loss: 5.08 | Total Steps: 96
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12887/200000  | Episode Reward: 1.0  | Average Reward 4.09  | Actor loss: -0.85 | Critic loss: 4.48 | Entropy loss: -0.0281  | Total Loss: 3.61 | Total Steps: 154
--- target colour: green, target object: capsule ---
Agent in ter

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12911/200000  | Episode Reward: 1.0  | Average Reward 4.46  | Actor loss: -0.25 | Critic loss: 5.21 | Entropy loss: -0.0081  | Total Loss: 4.95 | Total Steps: 161
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12912/200000  | Episode Reward: 3.0  | Average Reward 4.45  | Actor loss: -0.01 | Critic loss: 5.22 | Entropy loss: -0.0168  | Total Loss: 5.20 | Total Steps: 103
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12913/200000  | Episode Reward: 7.0  | Average Reward 4.49  | Actor loss: -0.33 | Critic loss: 3.31 | Entropy loss: -0.0090  | Total Loss: 2.97 | Total Steps: 131
--- target colour: red, target object:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12937/200000  | Episode Reward: -2.0  | Average Reward 4.29  | Actor loss: -0.27 | Critic loss: 2.76 | Entropy loss: -0.0166  | Total Loss: 2.48 | Total Steps: 206
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12938/200000  | Episode Reward: -5.0  | Average Reward 4.27  | Actor loss: -0.97 | Critic loss: 5.58 | Entropy loss: -0.0312  | Total Loss: 4.57 | Total Steps: 183
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12939/200000  | Episode R

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12963/200000  | Episode Reward: 4.0  | Average Reward 4.58  | Actor loss: -0.47 | Critic loss: 4.79 | Entropy loss: -0.0122  | Total Loss: 4.31 | Total Steps: 159
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12964/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 0.51 | Critic loss: 3.05 | Entropy loss: -0.0030  | Total Loss: 3.56 | Total Steps: 22
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12965/200000  | Episode Reward: 1.0  | Average Reward 4.60  | Actor loss: -0.60 | Critic loss: 5.77 | Entropy loss: -0.0156  | Total Loss: 5.15 | Total Steps: 161
--- target colour: red, target object: cube ---
Decision Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12988/200000  | Episode Reward: -2.0  | Average Reward 4.54  | Actor loss: -0.22 | Critic loss: 5.80 | Entropy loss: -0.0107  | Total Loss: 5.57 | Total Steps: 154
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12989/200000  | Episode Reward: 10.0  | Average Reward 4.58  | Actor loss: 0.12 | Critic loss: 4.77 | Entropy loss: -0.0095  | Total Loss: 4.88 | Total Steps: 99
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 12990/200000  | E

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13012/200000  | Episode Reward: 10.0  | Average Reward 4.61  | Actor loss: 0.03 | Critic loss: 4.48 | Entropy loss: -0.0112  | Total Loss: 4.50 | Total Steps: 95
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13013/200000  | Episode Reward: 10.0  | Average Reward 4.67  | Actor loss: 0.15 | Critic loss: 3.87 | Entropy loss: -0.0126  | Total Loss: 4.00 | Total Steps: 112
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13014/200000  | Episode Reward: 1.0  | Average Reward 4.62  | Actor loss: 0.16 | Critic loss: 3.91 | Entropy loss: -0.0014  | Total Loss: 4.07 | Total Steps: 282
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Train

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13039/200000  | Episode Reward: 10.0  | Average Reward 4.88  | Actor loss: -0.29 | Critic loss: 3.81 | Entropy loss: -0.0093  | Total Loss: 3.51 | Total Steps: 106
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13040/200000  | Episode Reward: -11.0  | Average Reward 4.79  | Actor loss: 0.27 | Critic loss: 4.77 | Entropy loss: -0.0032  | Total Loss: 5.04 | Total Steps: 286
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13041/200000  | Episode Reward: -5.0  | A

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13064/200000  | Episode Reward: -8.0  | Average Reward 4.90  | Actor loss: 0.27 | Critic loss: 3.40 | Entropy loss: -0.0015  | Total Loss: 3.67 | Total Steps: 277
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13065/200000  | Episode Reward: -12.0  | Average Reward 4.79  | Actor loss: -0.19 | Critic loss: 3.83 | Entropy loss: -0.0071  | Total Loss: 3.64 | Total Steps: 311
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13066/2

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13088/200000  | Episode Reward: 10.0  | Average Reward 4.65  | Actor loss: 0.45 | Critic loss: 3.59 | Entropy loss: -0.0175  | Total Loss: 4.03 | Total Steps: 105
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13089/200000  | Episode Reward: 10.0  | Average Reward 4.65  | Actor loss: 1.17 | Critic loss: 7.88 | Entropy loss: -0.0020  | Total Loss: 9.05 | Total Steps: 18
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13090/200000  | Episode Reward: 4.0  | Average Reward 4.67  | Actor loss: -0.39 | Critic loss: 3.99 | Entropy loss: -0.0125  | Total Loss: 3.59 | Total Steps: 157
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13115/200000  | Episode Reward: 4.0  | Average Reward 4.97  | Actor loss: -0.40 | Critic loss: 4.40 | Entropy loss: -0.0161  | Total Loss: 3.99 | Total Steps: 214
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13116/200000  | Episode Reward: 10.0  | Average Reward 4.97  | Actor loss: 0.32 | Critic loss: 3.44 | Entropy loss: -0.0011  | Total Loss: 3.77 | Total Steps: 16
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13117/200000  | Episode Reward: 10.0  | Average Reward 4.97  | Actor loss: -0.39 | Critic loss: 3.69 | Entropy loss: -0.0146  | Total Loss: 3.28 | Total Steps: 106
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13141/200000  | Episode Reward: 10.0  | Average Reward 5.12  | Actor loss: 0.22 | Critic loss: 2.80 | Entropy loss: -0.0016  | Total Loss: 3.02 | Total Steps: 18
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13142/200000  | Episode Reward: 10.0  | Average Reward 5.14  | Actor loss: 0.19 | Critic loss: 5.57 | Entropy loss: -0.0135  | Total Loss: 5.74 | Total Steps: 101
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13143/200000  | Episode Reward: 4.0  | Average Reward 5.11  | Actor loss: -0.40 | Critic loss: 5.57 | Entropy loss: -0.0193  | Total Loss: 5.15 | Total Steps: 158
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Ep

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13167/200000  | Episode Reward: 7.0  | Average Reward 5.03  | Actor loss: -0.32 | Critic loss: 4.32 | Entropy loss: -0.0180  | Total Loss: 3.97 | Total Steps: 204
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13168/200000  | Episode Reward: 4.0  | Average Reward 5.20  | Actor loss: -0.30 | Critic loss: 5.55 | Entropy loss: -0.0168  | Total Loss: 5.23 | Total Steps: 121
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13169/200000  | Episode Reward: 4.0  | Average Reward 5.18  | Actor loss: -0.24 | Critic loss: 5.53 | Entropy loss: -0.0117  | Total Loss: 5.27 | Total Steps: 99
--- target colour: blue, target object: cube ---
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13193/200000  | Episode Reward: 10.0  | Average Reward 5.45  | Actor loss: 0.14 | Critic loss: 1.87 | Entropy loss: -0.0057  | Total Loss: 2.01 | Total Steps: 110
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13194/200000  | Episode Reward: -5.0  | Average Reward 5.38  | Actor loss: -0.88 | Critic loss: 8.04 | Entropy loss: -0.0154  | Total Loss: 7.15 | Total Steps: 167
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13195/200000  | Episode Reward: 4.0  | Average Reward 5.38  | Actor loss: -0.20 | Critic loss: 2.80 | Entropy loss: -0.0177  | Total Loss: 2.58 | Total Steps: 125
--- target 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13218/200000  | Episode Reward: 10.0  | Average Reward 5.48  | Actor loss: 0.44 | Critic loss: 3.20 | Entropy loss: -0.0015  | Total Loss: 3.64 | Total Steps: 270
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13219/200000  | Episode Reward: 4.0  | Average Reward 5.45  | Actor loss: -0.18 | Critic loss: 4.82 | Entropy loss: -0.0076  | Total Loss: 4.64 | Total Steps: 147
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13220/200000  | Episode Reward: 1.0  | Average Reward 5.41  | Actor loss: -0.62 | Critic loss: 5.86 | Entropy loss: -0.0190  | Total Loss: 5.22 | Total Steps: 166
--- target colour: green, target object: capsule ---
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13244/200000  | Episode Reward: -2.0  | Average Reward 5.39  | Actor loss: -0.34 | Critic loss: 5.26 | Entropy loss: -0.0163  | Total Loss: 4.91 | Total Steps: 235
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13245/200000  | Episode Reward: 7.0  | Average Reward 5.42  | Actor loss: -0.16 | Critic loss: 3.10 | Entropy loss: -0.0180  | Total Loss: 2.92 | Total Steps: 169
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13246/200000  | Episode Reward: 7.0  | Average Reward 5.41  | Actor loss: -0.19 | Critic loss: 7.12 | Entropy loss: -0.0092  | Total Loss: 6.92 | Total Steps: 91
--- target colour: black, target object: pris

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13270/200000  | Episode Reward: 1.0  | Average Reward 5.62  | Actor loss: -0.71 | Critic loss: 5.77 | Entropy loss: -0.0145  | Total Loss: 5.04 | Total Steps: 131
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13271/200000  | Episode Reward: 10.0  | Average Reward 5.63  | Actor loss: 0.27 | Critic loss: 2.77 | Entropy loss: -0.0017  | Total Loss: 3.04 | Total Steps: 18
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13272/200000  | Episode Reward: 10.0  | Average Reward 5.63  | Actor loss: 0.34 | Critic loss: 2.76 | Entropy loss: -0.0023  | Total Loss: 3.09 | Total Steps: 19
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13295/200000  | Episode Reward: 4.0  | Average Reward 5.61  | Actor loss: -0.17 | Critic loss: 4.32 | Entropy loss: -0.0147  | Total Loss: 4.13 | Total Steps: 122
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13296/200000  | Episode Reward: 1.0  | Average Reward 5.58  | Actor loss: -0.31 | Critic loss: 4.24 | Entropy loss: -0.0139  | Total Loss: 3.91 | Total Steps: 157
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13297/200000  | Episode Reward: 10.0  | Average Reward 5.58  | Actor loss: 0.18 | Critic loss: 3.75 | Entropy loss: -0.0011  | Total Loss: 3.94 | Total Steps: 19
--- target colour: red, target object: prism ---
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13321/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: 0.28 | Critic loss: 1.84 | Entropy loss: -0.0016  | Total Loss: 2.11 | Total Steps: 17
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13322/200000  | Episode Reward: 9.0  | Average Reward 5.52  | Actor loss: 0.11 | Critic loss: 3.81 | Entropy loss: -0.0195  | Total Loss: 3.89 | Total Steps: 109
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13323/200000  | Episode Reward: 1.0  | Average Reward 5.52  | Actor loss: -0.15 | Critic loss: 4.88 | Entropy loss: -0.0089  | Total Loss: 4.73 | Total Steps: 135
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13347/200000  | Episode Reward: 0.0  | Average Reward 5.56  | Actor loss: -1.00 | Critic loss: 6.70 | Entropy loss: -0.0282  | Total Loss: 5.67 | Total Steps: 198
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13348/200000  | Episode Reward: 10.0  | Average Reward 5.62  | Actor loss: 0.31 | Critic loss: 1.94 | Entropy loss: -0.0016  | Total Loss: 2.25 | Total Steps: 17
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13349/200000  | Episode Reward: 4.0  | Average Reward 5.63  | Actor loss: -0.29 | Critic loss: 5.72 | Entropy loss: -0.0267  | Total Loss: 5.41 | Total Steps: 135
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in t

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13373/200000  | Episode Reward: 4.0  | Average Reward 5.57  | Actor loss: -0.12 | Critic loss: 3.54 | Entropy loss: -0.0180  | Total Loss: 3.40 | Total Steps: 161
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13374/200000  | Episode Reward: -2.0  | Average Reward 5.53  | Actor loss: -0.42 | Critic loss: 8.01 | Entropy loss: -0.0118  | Total Loss: 7.58 | Total Steps: 175
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13375/200000  | Episode Reward: 4.0  | Average Reward 5.50  | Actor loss: -0.48 | Critic loss: 4.71 | Entropy loss: -0.0138  | Total Loss: 4.21 | T

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13399/200000  | Episode Reward: 10.0  | Average Reward 5.67  | Actor loss: 0.26 | Critic loss: 1.84 | Entropy loss: -0.0014  | Total Loss: 2.09 | Total Steps: 17
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13400/200000  | Episode Reward: 10.0  | Average Reward 5.73  | Actor loss: 0.38 | Critic loss: 2.06 | Entropy loss: -0.0016  | Total Loss: 2.44 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13401/200000  | Episode Reward: 10.0  | Average Reward 5.73  | Actor loss: 0.43 | Critic loss: 2.05 | Entropy loss: -0.0014  | Total Loss: 2.49 | Total Steps: 16
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13402/200000  | Episode Reward: 10.0  | Average Reward 5.73  | Actor loss

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13424/200000  | Episode Reward: -8.0  | Average Reward 5.47  | Actor loss: 0.27 | Critic loss: 12.92 | Entropy loss: -0.0001  | Total Loss: 13.19 | Total Steps: 251
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13425/200000  | Episode Reward: 1.0  | Average Reward 5.42  | Actor loss: -0.32 | Critic loss: 4.95 | Entropy loss: -0.0205  | Total Loss: 4.61 | Total Steps: 174
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13426/200000  | Episode Reward: 1.0  | Average Reward 5.42  | Actor lo

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13449/200000  | Episode Reward: 4.0  | Average Reward 5.34  | Actor loss: -0.06 | Critic loss: 4.78 | Entropy loss: -0.0110  | Total Loss: 4.71 | Total Steps: 102
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13450/200000  | Episode Reward: 4.0  | Average Reward 5.36  | Actor loss: -0.84 | Critic loss: 5.33 | Entropy loss: -0.0232  | Total Loss: 4.46 | Total Steps: 185
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13451/200000  | Episode Reward: 10.0  | Average Reward 5.36  | Actor loss: 0.45 | Critic loss: 1.10 | Entropy loss: -0.0047  | Total Loss: 1.55 | Total Steps: 21
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13475/200000  | Episode Reward: 1.0  | Average Reward 5.21  | Actor loss: -0.65 | Critic loss: 5.56 | Entropy loss: -0.0187  | Total Loss: 4.89 | Total Steps: 144
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13476/200000  | Episode Reward: 10.0  | Average Reward 5.25  | Actor loss: 0.33 | Critic loss: 1.44 | Entropy loss: -0.0016  | Total Loss: 1.77 | Total Steps: 22
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13477/200000  | Episode Reward: 4.0  | Average Reward 5.29  | Actor loss: 0.02 | Critic loss: 3.62 | Entropy loss: -0.0265  | Total Loss: 3.61 | Total Steps: 115
--- target colour: red, target object: cube ---
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13501/200000  | Episode Reward: 1.0  | Average Reward 5.39  | Actor loss: -0.51 | Critic loss: 6.42 | Entropy loss: -0.0129  | Total Loss: 5.89 | Total Steps: 141
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13502/200000  | Episode Reward: 1.0  | Average Reward 5.38  | Actor loss: -0.45 | Critic loss: 5.72 | Entropy loss: -0.0126  | Total Loss: 5.26 | Total Steps: 159
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13503/200000  | Episode Reward: 1.0  | Average Reward 5.33  | Actor loss: -0.23 | Critic loss: 4.27 | Entropy loss: -0.0146  | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13525/200000  | Episode Reward: 10.0  | Average Reward 5.12  | Actor loss: 0.24 | Critic loss: 2.27 | Entropy loss: -0.0011  | Total Loss: 2.50 | Total Steps: 16
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13526/200000  | Episode Reward: 10.0  | Average Reward 5.14  | Actor loss: 0.37 | Critic loss: 4.59 | Entropy loss: -0.0011  | Total Loss: 4.96 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13527/200000  | Episode Reward: 7.0  | Average Reward 5.14  | Actor loss: -0.19 | Critic loss: 2.86 | Entropy loss: -0.0151  | Total Loss: 2.65 | Total Steps: 142
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13528/200000  | Episode Reward: 10.0  | Avera

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13550/200000  | Episode Reward: 7.0  | Average Reward 4.91  | Actor loss: -0.05 | Critic loss: 4.26 | Entropy loss: -0.0076  | Total Loss: 4.20 | Total Steps: 106
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13551/200000  | Episode Reward: -8.0  | Average Reward 4.87  | Actor loss: 0.49 | Critic loss: 6.31 | Entropy loss: -0.0013  | Total Loss: 6.80 | Total Steps: 275
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13552/200000  | Episode Reward: -5.0

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13575/200000  | Episode Reward: 1.0  | Average Reward 4.83  | Actor loss: 0.12 | Critic loss: 3.52 | Entropy loss: -0.0173  | Total Loss: 3.62 | Total Steps: 130
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13576/200000  | Episode Reward: 7.0  | Average Reward 4.86  | Actor loss: -0.30 | Critic loss: 6.49 | Entropy loss: -0.0100  | Total Loss: 6.18 | Total Steps: 96
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13577/200000  | Episode Reward: 7.0  | Average Reward 4.91  | Actor loss: 0.00 | Critic loss: 4.24 | Entropy loss: -0.0066  | Total Loss: 4.23 | Total Steps: 97
--- target colour: blue, target object: prism ---
Agent in terminal step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13601/200000  | Episode Reward: -7.0  | Average Reward 4.61  | Actor loss: -0.57 | Critic loss: 8.08 | Entropy loss: -0.0349  | Total Loss: 7.48 | Total Steps: 217
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13602/200000  | Episode Reward: 6.0  | Average Reward 4.58  | Actor loss: -0.29 | Critic loss: 2.75 | Entropy loss: -0.0323  | Total Loss: 2.43 | Total Steps: 200
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13603/200000  | Episode Reward: 4.0  | Average Reward 4.57  | Actor loss: -0.37 | Critic loss: 2.40 | Entropy loss: -0.0117  | Total Loss: 2.02 | Total

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13626/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 0.07 | Critic loss: 6.44 | Entropy loss: -0.0002  | Total Loss: 6.51 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13627/200000  | Episode Reward: 10.0  | Average Reward 4.82  | Actor loss: 0.10 | Critic loss: 1.87 | Entropy loss: -0.0005  | Total Loss: 1.96 | Total Steps: 21
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13628/200000  | Episode Reward: 4.0  | Average Reward 4.83  | Actor loss: -0.33 | Critic loss: 5.37 | Entropy loss: -0.0166  | Total Loss: 5.03 | Total Steps: 121
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13651/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.04 | Critic loss: 3.67 | Entropy loss: -0.0153  | Total Loss: 3.70 | Total Steps: 109
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13652/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.82 | Critic loss: 3.02 | Entropy loss: -0.0016  | Total Loss: 3.84 | Total Steps: 19
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13653/200000  | Episode Reward: 1.0  | Average Reward 4.79  | Actor loss: -0.37 | Critic loss: 5.18 | Entropy loss: -0.0113  | Total Loss: 4.79 | Total Steps: 121
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13676/200000  | Episode Reward: 10.0  | Average Reward 4.75  | Actor loss: 0.26 | Critic loss: 2.87 | Entropy loss: -0.0011  | Total Loss: 3.12 | Total Steps: 16
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13677/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.08 | Critic loss: 7.14 | Entropy loss: -0.0121  | Total Loss: 7.05 | Total Steps: 107
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13678/200000  | Episode Reward: 1.0  | Average Reward 4.70  | Actor loss: -0.28 | Critic loss: 5.77 | Entropy loss: -0.0130  | Total Loss: 5.47 | Total Steps: 156
--- target colour: black, target object: capsule ---
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13703/200000  | Episode Reward: 1.0  | Average Reward 4.88  | Actor loss: -0.41 | Critic loss: 6.69 | Entropy loss: -0.0106  | Total Loss: 6.26 | Total Steps: 152
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13704/200000  | Episode Reward: -2.0  | Average Reward 4.86  | Actor loss: -0.55 | Critic loss: 7.70 | Entropy loss: -0.0206  | Total Loss: 7.13 | Total Steps: 187
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13705/200000  | Episode Reward: 10.0  | Average Reward 4.92  | Actor loss: -0.13 | Critic loss: 2.35 | Entropy loss: -0.0083  | Total Loss: 2.21 | Total Steps: 116
--- target colour

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13730/200000  | Episode Reward: 0.0  | Average Reward 5.10  | Actor loss: -0.76 | Critic loss: 4.64 | Entropy loss: -0.0261  | Total Loss: 3.85 | Total Steps: 129
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13731/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: 0.04 | Critic loss: 1.40 | Entropy loss: -0.0005  | Total Loss: 1.44 | Total Steps: 21
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13732/200000  | Episode Reward: 4.0  | Average Reward 5.17  | Actor loss: -0.33 | Critic loss: 5.01 | Entropy loss: -0.0124  | Total Loss: 4.67 | Total Steps: 140
--- target colour: yellow, target object: cylinder ---
Decision St

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13756/200000  | Episode Reward: 6.0  | Average Reward 5.58  | Actor loss: -0.64 | Critic loss: 4.02 | Entropy loss: -0.0385  | Total Loss: 3.34 | Total Steps: 138
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13757/200000  | Episode Reward: 1.0  | Average Reward 5.58  | Actor loss: -0.87 | Critic loss: 6.54 | Entropy loss: -0.0117  | Total Loss: 5.66 | Total Steps: 134
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13758/200000  | Episode Reward: 4.0  | Average Reward 5.55  | Actor loss: -0.25 | Critic loss: 6.64 | Entropy loss: -0.0077  | Total Loss: 6.38 | Total Steps: 112
--- target colour: blue, target object: caps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13780/200000  | Episode Reward: 9.0  | Average Reward 5.13  | Actor loss: 0.03 | Critic loss: 3.69 | Entropy loss: -0.0278  | Total Loss: 3.69 | Total Steps: 122
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13781/200000  | Episode Reward: 10.0  | Average Reward 5.13  | Actor loss: 1.31 | Critic loss: 5.56 | Entropy loss: -0.0021  | Total Loss: 6.87 | Total Steps: 19
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13782/200000  | Episode Reward: 10.0  | Average Reward 5.13  | Actor loss: 0.28 | Critic loss: 3.74 | Entropy loss: -0.0292  | Total Loss: 3.98 | Total Steps: 118
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13805/200000  | Episode Reward: -2.0  | Average Reward 5.12  | Actor loss: -0.44 | Critic loss: 6.18 | Entropy loss: -0.0118  | Total Loss: 5.73 | Total Steps: 173
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13806/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: 0.39 | Critic loss: 3.73 | Entropy loss: -0.0015  | Total Loss: 4.12 | Total Steps: 17
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13807/200000  | Episode Reward: 7.0  | Average Reward 5.24  | Actor loss: -0.10 | Critic loss: 1.23 | Entropy loss: -0.0252  | Total Loss: 1.11 | Total Steps: 171
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13831/200000  | Episode Reward: 3.0  | Average Reward 5.20  | Actor loss: -0.32 | Critic loss: 5.81 | Entropy loss: -0.0172  | Total Loss: 5.47 | Total Steps: 118
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13832/200000  | Episode Reward: 7.0  | Average Reward 5.21  | Actor loss: -0.45 | Critic loss: 4.06 | Entropy loss: -0.0236  | Total Loss: 3.58 | Total Steps: 169
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13833/200000  | Episode Reward: 1.0  | Average Reward 5.23  | Actor loss: -0.21 | Critic loss: 4.07 | Entropy loss: -0.0132  | Total Loss: 3.85 | Total Steps: 126
--- target colour: red, target object: pr

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13856/200000  | Episode Reward: -3.0  | Average Reward 5.21  | Actor loss: -0.84 | Critic loss: 7.17 | Entropy loss: -0.0211  | Total Loss: 6.31 | Total Steps: 117
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13857/200000  | Episode Reward: -2.0  | Average Reward 5.18  | Actor loss: -0.51 | Critic loss: 7.77 | Entropy loss: -0.0163  | Total Loss: 7.25 | Total Steps: 148
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in ter

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13880/200000  | Episode Reward: 7.0  | Average Reward 4.99  | Actor loss: -0.19 | Critic loss: 4.15 | Entropy loss: -0.0080  | Total Loss: 3.95 | Total Steps: 134
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13881/200000  | Episode Reward: 4.0  | Average Reward 4.96  | Actor loss: -0.06 | Critic loss: 3.64 | Entropy loss: -0.0109  | Total Loss: 3.56 | Total Steps: 120
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13882/200000  | Episode Reward: 10.0  | Average Reward 4.97  | Actor loss: 0.78 | Critic loss: 3.04 | Entropy loss: -0.0035  | Total Loss: 3.82 | Total Steps: 18
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Ep

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13906/200000  | Episode Reward: 10.0  | Average Reward 4.87  | Actor loss: 0.03 | Critic loss: 4.64 | Entropy loss: -0.0001  | Total Loss: 4.67 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13907/200000  | Episode Reward: 10.0  | Average Reward 4.89  | Actor loss: 0.80 | Critic loss: 2.39 | Entropy loss: -0.0015  | Total Loss: 3.19 | Total Steps: 19
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13908/200000  | Episode Reward: 10.0  | Average Reward 4.91  | Actor loss: 1.49 | Critic loss: 12.08 | Entropy loss: -0.0068  | Total Loss: 13.56 | Total Steps: 44
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13909/200000  | Episode Reward: 8.0  | Average Reward 4.89  | Actor 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13932/200000  | Episode Reward: -14.0  | Average Reward 4.59  | Actor loss: -0.24 | Critic loss: 7.41 | Entropy loss: -0.0143  | Total Loss: 7.15 | Total Steps: 236
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13933/200000  | Episode Reward: 10.0  | Average Reward 4.62  | Actor loss: 0.41 | Critic loss: 3.17 | Entropy loss: -0.0012  | Total Loss: 3.59 | Total Steps: 16
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13934/200000  | Episode Reward: 10.0  | Average Reward 4.67  | Actor loss: 0.71 | Critic loss: 0.82 | Entropy loss: -0.0031  | Total Loss: 1.53 | Total

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13957/200000  | Episode Reward: 4.0  | Average Reward 4.53  | Actor loss: -0.26 | Critic loss: 4.70 | Entropy loss: -0.0158  | Total Loss: 4.42 | Total Steps: 107
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13958/200000  | Episode Reward: 10.0  | Average Reward 4.55  | Actor loss: 0.04 | Critic loss: 3.18 | Entropy loss: -0.0002  | Total Loss: 3.21 | Total Steps: 17
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13959/200000  | Episode Reward: 0.0  | Average Reward 4.54  | Actor loss: -0.36 | Critic loss: 6.70 | Entropy loss: -0.0164  | Total Loss: 6.32 | Total Steps: 152
--- target colour: blue, target object: prism ---
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13982/200000  | Episode Reward: 4.0  | Average Reward 4.74  | Actor loss: -0.20 | Critic loss: 2.52 | Entropy loss: -0.0283  | Total Loss: 2.29 | Total Steps: 171
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13983/200000  | Episode Reward: 10.0  | Average Reward 4.79  | Actor loss: -0.30 | Critic loss: 1.27 | Entropy loss: -0.0121  | Total Loss: 0.96 | Total Steps: 136
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 13984/200000  | Episode Reward: 10.0  | Average Reward 4.84  | Actor loss: 1.58 | Critic loss: 11.26 | Entropy loss: -0.0106  | Total Loss: 12.82 | Total Steps: 56
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14007/200000  | Episode Reward: 1.0  | Average Reward 4.79  | Actor loss: -0.49 | Critic loss: 5.46 | Entropy loss: -0.0104  | Total Loss: 4.95 | Total Steps: 134
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14008/200000  | Episode Reward: 10.0  | Average Reward 4.83  | Actor loss: 0.36 | Critic loss: 3.22 | Entropy loss: -0.0012  | Total Loss: 3.58 | Total Steps: 16
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14009/200000  | Episode Reward: 1.0  | Average Reward 4.80  | Actor loss: -0.68 | Critic loss: 7.24 | Entropy loss: -0.0331  | Total Loss: 6.52 | Total Steps: 218
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in t

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14032/200000  | Episode Reward: 4.0  | Average Reward 4.62  | Actor loss: -0.75 | Critic loss: 3.99 | Entropy loss: -0.0143  | Total Loss: 3.23 | Total Steps: 124
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14033/200000  | Episode Reward: 9.0  | Average Reward 4.66  | Actor loss: 0.20 | Critic loss: 8.50 | Entropy loss: -0.0166  | Total Loss: 8.69 | Total Steps: 76
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14034/200000  | Episode Reward: 7.0  | Average Reward 4.64  | Actor loss: -0.15 | Critic loss: 5.23 | Entropy loss: -0.0178  | Total Loss: 5.07 | Total Steps: 149
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14056/200000  | Episode Reward: 4.0  | Average Reward 4.37  | Actor loss: 0.16 | Critic loss: 3.36 | Entropy loss: -0.0178  | Total Loss: 3.50 | Total Steps: 111
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14057/200000  | Episode Reward: 1.0  | Average Reward 4.38  | Actor loss: 0.07 | Critic loss: 7.02 | Entropy loss: -0.0067  | Total Loss: 7.09 | Total Steps: 103
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14058/200000  | Episode Reward: 1.0  | Average Reward 4.50  | Actor loss: -0.22 | Critic loss: 5.53 | Entropy loss: -0.0083  | Total Loss: 5.30 | Total St

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14081/200000  | Episode Reward: 7.0  | Average Reward 4.53  | Actor loss: 0.38 | Critic loss: 4.16 | Entropy loss: -0.0084  | Total Loss: 4.54 | Total Steps: 97
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14082/200000  | Episode Reward: 1.0  | Average Reward 4.48  | Actor loss: -0.32 | Critic loss: 4.88 | Entropy loss: -0.0119  | Total Loss: 4.55 | Total Steps: 176
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14083/200000  | Episode Reward: 4.0  | Average Reward 4.45  | Actor loss: -0.10 | Critic loss: 3.06 | Entropy loss: -0.0270  | Total Loss: 2.93 | Total Steps: 169
--- target colour: black, target object: ca

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14105/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.09 | Critic loss: 2.48 | Entropy loss: -0.0006  | Total Loss: 2.58 | Total Steps: 21
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14106/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.07 | Critic loss: 3.68 | Entropy loss: -0.0080  | Total Loss: 3.74 | Total Steps: 112
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14107/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 0.08 | Critic loss: 1.60 | Entropy loss: -0.0006  | Total Loss: 1.68 | Total Steps: 21
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14108/200000  | Episode Reward: 10.0  | Average Reward 4.17  | Actor loss: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14130/200000  | Episode Reward: 4.0  | Average Reward 4.02  | Actor loss: -0.41 | Critic loss: 4.18 | Entropy loss: -0.0120  | Total Loss: 3.76 | Total Steps: 137
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14131/200000  | Episode Reward: 7.0  | Average Reward 4.00  | Actor loss: -0.04 | Critic loss: 2.65 | Entropy loss: -0.0171  | Total Loss: 2.59 | Total Steps: 156
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14132/200000  | Episode Reward: 7.0  | Average Reward 4.11  | Actor loss: -0.00 | Critic loss: 4.80 | Entropy loss: -0.0057  | Total Loss: 4.79 | Total Steps: 94
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14156/200000  | Episode Reward: -5.0  | Average Reward 4.08  | Actor loss: -0.34 | Critic loss: 5.85 | Entropy loss: -0.0105  | Total Loss: 5.50 | Total Steps: 167
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14157/200000  | Episode Reward: 4.0  | Average Reward 4.08  | Actor loss: 0.21 | Critic loss: 2.71 | Entropy loss: -0.0153  | Total Loss: 2.91 | Total Steps: 112
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14158/200000  | Episode Reward: 10.0  | Average Reward 4.08  | Actor loss: 0.58 | Critic loss: 5.61 | Entropy loss: -0.0016  | Total Loss: 6.19 | Total Steps: 17
--- target colour: r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14182/200000  | Episode Reward: 10.0  | Average Reward 4.29  | Actor loss: 0.30 | Critic loss: 2.74 | Entropy loss: -0.0012  | Total Loss: 3.03 | Total Steps: 16
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14183/200000  | Episode Reward: -21.0  | Average Reward 4.13  | Actor loss: -0.98 | Critic loss: 8.72 | Entropy loss: -0.0308  | Total Loss: 7.70 | Total Steps: 465
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14184/200000  | Episode Reward: 10.0  | Average Reward 4.13  | Actor loss: 0.08 | Critic l

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14206/200000  | Episode Reward: -2.0  | Average Reward 4.02  | Actor loss: 0.12 | Critic loss: 6.79 | Entropy loss: -0.0003  | Total Loss: 6.91 | Total Steps: 264
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14207/200000  | Episode Reward: 10.0  | Average Reward 4.07  | Actor loss: 0.01 | Critic loss: 2.83 | Entropy loss: -0.0130  | Total Loss: 2.83 | Total Steps: 110
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14208/200000  | Episode Reward: 10.0  | Average Reward 4.07  | Actor loss: -0.16 | Critic loss: 4.36 | Entropy loss: -0.0110  | Total Loss: 4.19 | Total Steps: 99
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
T

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14231/200000  | Episode Reward: 0.0  | Average Reward 3.96  | Actor loss: 0.42 | Critic loss: 7.04 | Entropy loss: -0.0022  | Total Loss: 7.46 | Total Steps: 283
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14232/200000  | Episode Reward: 1.0  | Average Reward 3.95  | Actor loss: -0.48 | Critic loss: 4.75 | Entropy loss: -0.0112  | Total Loss: 4.26 | Total Steps: 159
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14233/200000  | Episode Reward: 10.0  | Average Reward 3.96  | Actor loss: 0.07 | Critic loss: 2.35 | Entropy loss: -0.0003  | Total Loss: 2.43 | Total Steps: 17
--- target colour: green, target ob

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14256/200000  | Episode Reward: -2.0  | Average Reward 4.19  | Actor loss: -0.40 | Critic loss: 4.72 | Entropy loss: -0.0210  | Total Loss: 4.30 | Total Steps: 207
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14257/200000  | Episode Reward: 10.0  | Average Reward 4.24  | Actor loss: 0.20 | Critic loss: 1.68 | Entropy loss: -0.0011  | Total Loss: 1.87 | Total Steps: 16
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14258/200000  | Episode Reward: 10.0  | Average Reward 4.28  | Actor loss: -0.09 | Critic loss: 4.76 | Entropy loss: -0.0180  | Total Loss: 4.65 | Total Steps: 107
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14281/200000  | Episode Reward: -14.0  | Average Reward 4.18  | Actor loss: -0.15 | Critic loss: 3.41 | Entropy loss: -0.0028  | Total Loss: 3.25 | Total Steps: 294
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14282/200000  | Episode Reward: -2.0  | Average Reward 4.17  | Actor loss: -0.39 | Critic loss: 6.77 | Entropy loss: -0.0094  | Total Loss: 6.37 | Total Steps: 163
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14283/200000  | Episode Reward: 10.0

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14306/200000  | Episode Reward: 1.0  | Average Reward 4.46  | Actor loss: -0.64 | Critic loss: 6.73 | Entropy loss: -0.0173  | Total Loss: 6.07 | Total Steps: 139
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14307/200000  | Episode Reward: 10.0  | Average Reward 4.46  | Actor loss: 0.22 | Critic loss: 1.47 | Entropy loss: -0.0011  | Total Loss: 1.69 | Total Steps: 16
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14308/200000  | Episode Reward: 4.0  | Average Reward 4.43  | Actor loss: -0.20 | Critic loss: 3.06 | Entropy loss: -0.0145  | Total Loss: 2.84 | Total Steps: 201
--- target colour: green, target object: cylinder ---
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14332/200000  | Episode Reward: 9.0  | Average Reward 4.70  | Actor loss: -0.22 | Critic loss: 4.01 | Entropy loss: -0.0192  | Total Loss: 3.77 | Total Steps: 115
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14333/200000  | Episode Reward: 4.0  | Average Reward 4.75  | Actor loss: -0.09 | Critic loss: 8.02 | Entropy loss: -0.0092  | Total Loss: 7.92 | Total Steps: 100
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14334/200000  | Episode Reward: 1.0  | Average Reward 4.72  | Actor loss: -0.48 | Critic loss: 4.04 | Entropy loss: -0.0222  | Total Loss: 3.54 | Total Steps: 187
--- target colour: blue, target object: capsule ---
Decision St

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14358/200000  | Episode Reward: -2.0  | Average Reward 4.61  | Actor loss: -0.45 | Critic loss: 6.09 | Entropy loss: -0.0153  | Total Loss: 5.62 | Total Steps: 152
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14359/200000  | Episode Reward: 1.0  | Average Reward 4.57  | Actor loss: -0.28 | Critic loss: 6.13 | Entropy loss: -0.0116  | Total Loss: 5.85 | Total Steps: 104
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14360/200000  | Episode Reward: 4.0  | Average Reward 4.55  | Actor loss: -0.45 | Critic loss: 7.04 | Entropy loss: -0.0165  | Tot

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14384/200000  | Episode Reward: 4.0  | Average Reward 4.61  | Actor loss: -0.36 | Critic loss: 4.17 | Entropy loss: -0.0140  | Total Loss: 3.79 | Total Steps: 150
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14385/200000  | Episode Reward: 4.0  | Average Reward 4.64  | Actor loss: 0.06 | Critic loss: 5.52 | Entropy loss: -0.0150  | Total Loss: 5.56 | Total Steps: 117
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14386/200000  | Episode Reward: -2.0  | Average Reward 4.62  | Actor loss: 0.29 | Critic loss: 2.72 | Entropy loss: -0.0088  | Total Loss: 3.00 |

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14408/200000  | Episode Reward: 4.0  | Average Reward 4.54  | Actor loss: -0.15 | Critic loss: 4.86 | Entropy loss: -0.0121  | Total Loss: 4.70 | Total Steps: 112
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14409/200000  | Episode Reward: 7.0  | Average Reward 4.55  | Actor loss: -0.19 | Critic loss: 3.62 | Entropy loss: -0.0172  | Total Loss: 3.41 | Total Steps: 155
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14410/200000  | Episode Reward: 1.0  | Average Reward 4.61  | Actor loss: -0.22 | Critic loss: 4.40 | Entropy loss: -0.0081  | Total Loss: 4.17 | Total Steps: 148
--- target colour: green, target object: sphere ---
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14434/200000  | Episode Reward: 1.0  | Average Reward 4.75  | Actor loss: -0.38 | Critic loss: 5.76 | Entropy loss: -0.0106  | Total Loss: 5.37 | Total Steps: 168
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14435/200000  | Episode Reward: 1.0  | Average Reward 4.73  | Actor loss: -0.30 | Critic loss: 4.58 | Entropy loss: -0.0140  | Total Loss: 4.27 | Total Steps: 101
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14436/200000  | Episode Reward: 4.0  | Average Reward 4.70  | Actor loss: -0.23 | Critic loss: 4.75 | Entropy loss: -0.0128  | Total Loss: 4.50 | Total Steps

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14461/200000  | Episode Reward: 10.0  | Average Reward 5.14  | Actor loss: 0.51 | Critic loss: 2.29 | Entropy loss: -0.0138  | Total Loss: 2.79 | Total Steps: 95
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14462/200000  | Episode Reward: 10.0  | Average Reward 5.14  | Actor loss: 0.35 | Critic loss: 2.89 | Entropy loss: -0.0014  | Total Loss: 3.23 | Total Steps: 18
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14463/200000  | Episode Reward: -2.0  | Average Reward 5.12  | Actor loss: -0.67 | Critic loss: 5.96 | Entropy loss: -0.0317  | Total Loss: 5.25 | Total Steps: 203
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14487/200000  | Episode Reward: 10.0  | Average Reward 5.47  | Actor loss: 0.20 | Critic loss: 1.27 | Entropy loss: -0.0011  | Total Loss: 1.46 | Total Steps: 16
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14488/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: -0.45 | Critic loss: 3.61 | Entropy loss: -0.0147  | Total Loss: 3.15 | Total Steps: 148
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14489/200000  | Episode Reward: 7.0  | Average Reward 5.50  | Actor loss: -0.03 | Critic loss: 2.49 | Entropy loss: -0.0110  | Total Loss: 2.45 | Total Steps: 136
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14490/200000  | Episo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14512/200000  | Episode Reward: 10.0  | Average Reward 5.41  | Actor loss: 0.69 | Critic loss: 2.18 | Entropy loss: -0.0024  | Total Loss: 2.86 | Total Steps: 18
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14513/200000  | Episode Reward: 10.0  | Average Reward 5.44  | Actor loss: -0.07 | Critic loss: 1.30 | Entropy loss: -0.0090  | Total Loss: 1.22 | Total Steps: 115
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14514/200000  | Episode Reward: 10.0  | Average Reward 5.44  | Actor loss: 0.33 | Critic loss: 3.80 | Entropy loss: -0.0017  | Total Loss: 4.12 | Total Steps: 18
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14515/200000  | Epi

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14538/200000  | Episode Reward: 4.0  | Average Reward 5.42  | Actor loss: -0.42 | Critic loss: 6.29 | Entropy loss: -0.0109  | Total Loss: 5.86 | Total Steps: 106
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14539/200000  | Episode Reward: 7.0  | Average Reward 5.41  | Actor loss: -0.11 | Critic loss: 3.56 | Entropy loss: -0.0126  | Total Loss: 3.44 | Total Steps: 151
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14540/200000  | Episode Reward: 4.0  | Average Reward 5.38  | Actor loss: -0.17 | Critic loss: 4.56 | Entropy loss: -0.0086  | Total Loss: 4.38 | Total Steps: 141
--- target colour: red, target object: sphere ---
Decision Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14563/200000  | Episode Reward: 0.0  | Average Reward 5.46  | Actor loss: -0.78 | Critic loss: 4.54 | Entropy loss: -0.0249  | Total Loss: 3.74 | Total Steps: 163
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14564/200000  | Episode Reward: -14.0  | Average Reward 5.39  | Actor loss: 0.27 | Critic loss: 3.22 | Entropy loss: -0.0058  | Total Loss: 3.48 | Total Steps: 313
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14565/200000  | Episode Reward: 10.0  | Average Reward 5.39  | Actor lo

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14589/200000  | Episode Reward: 7.0  | Average Reward 5.51  | Actor loss: 0.10 | Critic loss: 4.03 | Entropy loss: -0.0054  | Total Loss: 4.12 | Total Steps: 93
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14590/200000  | Episode Reward: 10.0  | Average Reward 5.53  | Actor loss: 0.06 | Critic loss: 2.73 | Entropy loss: -0.0054  | Total Loss: 2.79 | Total Steps: 92
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14591/200000  | Episode Reward: 0.0  | Average Reward 5.58  | Actor loss: -0.60 | Critic loss: 8.91 | Entropy loss: -0.0130  | Total Loss: 8.29 | Total Steps: 105
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14614/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: 0.24 | Critic loss: 2.04 | Entropy loss: -0.0014  | Total Loss: 2.27 | Total Steps: 17
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14615/200000  | Episode Reward: 1.0  | Average Reward 5.42  | Actor loss: -0.22 | Critic loss: 4.07 | Entropy loss: -0.0094  | Total Loss: 3.84 | Total Steps: 170
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14616/200000  | Episode Reward: 10.0  | Average Reward 5.42  | Actor loss: -0.07 | Critic loss: 3.80 | Entropy loss: -0.0081  | Total Loss: 3.72 | Total Steps: 100
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14639/200000  | Episode Reward: 10.0  | Average Reward 5.28  | Actor loss: 0.13 | Critic loss: 2.51 | Entropy loss: -0.0071  | Total Loss: 2.64 | Total Steps: 101
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14640/200000  | Episode Reward: 10.0  | Average Reward 5.30  | Actor loss: 0.45 | Critic loss: 2.42 | Entropy loss: -0.0015  | Total Loss: 2.87 | Total Steps: 17
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14641/200000  | Episode Reward: 10.0  | Average Reward 5.30  | Actor loss: 0.43 | Critic loss: 11.33 | Entropy loss: -0.0039  | Total Loss: 11.76 | Total Steps: 46
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14642/200000  | Episode Reward: 10.0  | Average Reward 5.30  | Actor

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14665/200000  | Episode Reward: 4.0  | Average Reward 5.02  | Actor loss: -0.17 | Critic loss: 4.85 | Entropy loss: -0.0137  | Total Loss: 4.66 | Total Steps: 145
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14666/200000  | Episode Reward: 8.0  | Average Reward 5.06  | Actor loss: -0.27 | Critic loss: 1.28 | Entropy loss: -0.0303  | Total Loss: 0.98 | Total Steps: 177
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14667/200000  | Episode Reward: -2.0  | Average Reward 5.01  | Actor loss: -0.23 | Critic loss: 5.78 | Entropy loss: -0.0231  | Total Loss: 5.53 | Total Steps: 159
--- target colour: red, target object: 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14689/200000  | Episode Reward: 10.0  | Average Reward 4.53  | Actor loss: 0.40 | Critic loss: 4.77 | Entropy loss: -0.0012  | Total Loss: 5.17 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14690/200000  | Episode Reward: -6.0  | Average Reward 4.46  | Actor loss: 1.80 | Critic loss: 6.35 | Entropy loss: -0.0100  | Total Loss: 8.15 | Total Steps: 298
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14691/200000  | Episode Reward: 3.0  | Average Reward 4.46  | Actor loss: -0.33 | Critic loss: 2.69 | Entropy loss: -0.0377  | Total Loss: 2.32 | Total Steps: 224
--- tar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14714/200000  | Episode Reward: 4.0  | Average Reward 4.47  | Actor loss: -0.20 | Critic loss: 3.61 | Entropy loss: -0.0152  | Total Loss: 3.40 | Total Steps: 120
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14715/200000  | Episode Reward: 4.0  | Average Reward 4.48  | Actor loss: -0.24 | Critic loss: 4.92 | Entropy loss: -0.0104  | Total Loss: 4.67 | Total Steps: 139
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14716/200000  | Episode Reward: 7.0  | Average Reward 4.48  | Actor loss: 0.28 | Critic loss: 3.92 | Entropy loss: -0.0142  | Total Loss: 4.19 | Total Steps: 154
--- target colour: yellow, target object: cylinder ---
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14739/200000  | Episode Reward: 8.0  | Average Reward 4.26  | Actor loss: -0.37 | Critic loss: 1.65 | Entropy loss: -0.0287  | Total Loss: 1.25 | Total Steps: 191
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14740/200000  | Episode Reward: 7.0  | Average Reward 4.28  | Actor loss: -0.19 | Critic loss: 3.67 | Entropy loss: -0.0140  | Total Loss: 3.47 | Total Steps: 133
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14741/200000  | Episode Reward: 4.0  | Average Reward 4.33  | Actor loss: -0.22 | Critic loss: 3.94 | Entropy loss: -0.0143  | Total Loss: 3.71 | Total Steps: 152
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14764/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.59 | Critic loss: 5.56 | Entropy loss: -0.0012  | Total Loss: 6.15 | Total Steps: 17
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14765/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.42 | Critic loss: 3.09 | Entropy loss: -0.0018  | Total Loss: 3.50 | Total Steps: 17
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14766/200000  | Episode Reward: 10.0  | Average Reward 4.38  | Actor loss: 0.58 | Critic loss: 3.64 | Entropy loss: -0.0017  | Total Loss: 4.22 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14790/200000  | Episode Reward: 7.0  | Average Reward 4.33  | Actor loss: -0.39 | Critic loss: 3.80 | Entropy loss: -0.0084  | Total Loss: 3.41 | Total Steps: 144
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14791/200000  | Episode Reward: -2.0  | Average Reward 4.32  | Actor loss: -0.64 | Critic loss: 8.79 | Entropy loss: -0.0164  | Total Loss: 8.13 | Total Steps: 224
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14792/200000  | Episode Reward: 10.0  | Average Reward 4.35  | Actor loss: 0.18 | Critic loss: 2.16 | Entropy loss: -0.0017  | Total Loss: 2.33 | Total Steps: 18
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14815/200000  | Episode Reward: 1.0  | Average Reward 4.38  | Actor loss: -0.08 | Critic loss: 3.95 | Entropy loss: -0.0252  | Total Loss: 3.85 | Total Steps: 172
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14816/200000  | Episode Reward: -2.0  | Average Reward 4.33  | Actor loss: -0.70 | Critic loss: 6.67 | Entropy loss: -0.0186  | Total Loss: 5.95 | Total Steps: 174
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14817/200000  | Episode Reward: 4.0  | Average Reward 4.36  | Actor loss: -0.21 | Critic loss: 2.14 | Entropy loss: -0.0077  | T

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14840/200000  | Episode Reward: -5.0  | Average Reward 4.25  | Actor loss: -0.43 | Critic loss: 7.12 | Entropy loss: -0.0141  | Total Loss: 6.68 | Total Steps: 411
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14841/200000  | Episode Reward: 1.0  | Average Reward 4.21  | Actor loss: 0.36 | Critic loss: 2.74 | Entropy loss: -0.0025  | Total Loss: 3.09 | Total Steps: 278
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14842/2

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14865/200000  | Episode Reward: -5.0  | Average Reward 4.25  | Actor loss: -0.43 | Critic loss: 5.22 | Entropy loss: -0.0215  | Total Loss: 4.76 | Total Steps: 216
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14866/200000  | Episode Reward: 10.0  | Average Reward 4.25  | Actor loss: 0.26 | Critic loss: 2.58 | Entropy loss: -0.0016  | Total Loss: 2.84 | Total Steps: 18
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14867/200000  | Episode Reward: 10.0  | Average Reward 4.32  | Actor loss: -0.01 | Critic loss: 2.89 | Entropy loss: -0.0089  | Total Loss: 2.87 | Total Steps: 119
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14891/200000  | Episode Reward: 10.0  | Average Reward 4.84  | Actor loss: 0.13 | Critic loss: 3.06 | Entropy loss: -0.0017  | Total Loss: 3.18 | Total Steps: 19
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14892/200000  | Episode Reward: 10.0  | Average Reward 4.89  | Actor loss: 0.20 | Critic loss: 1.93 | Entropy loss: -0.0014  | Total Loss: 2.12 | Total Steps: 17
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14893/200000  | Episode Reward: 10.0  | Average Reward 4.96  | Actor loss: 0.20 | Critic loss: 1.81 | Entropy loss: -0.0015  | Total Loss: 2.01 | Total Steps: 18
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14894/200000  | Episode Reward: 10.0  | Average Reward 5.03  | Acto

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14917/200000  | Episode Reward: 1.0  | Average Reward 4.92  | Actor loss: -0.74 | Critic loss: 7.20 | Entropy loss: -0.0126  | Total Loss: 6.45 | Total Steps: 161
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14918/200000  | Episode Reward: 7.0  | Average Reward 4.91  | Actor loss: -0.12 | Critic loss: 3.03 | Entropy loss: -0.0202  | Total Loss: 2.89 | Total Steps: 147
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14919/200000  | Episode Reward: 7.0  | Average Reward 4.93  | Actor loss: 0.10 | Critic loss: 1.59 | Entropy loss: -0.0289  | Total Loss: 1.66 | Total Steps: 141
--- target colour: black, target object: capsule ---
Agent in termin

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14943/200000  | Episode Reward: 4.0  | Average Reward 5.13  | Actor loss: -0.19 | Critic loss: 2.72 | Entropy loss: -0.0156  | Total Loss: 2.52 | Total Steps: 123
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14944/200000  | Episode Reward: 6.0  | Average Reward 5.23  | Actor loss: -0.16 | Critic loss: 1.79 | Entropy loss: -0.0287  | Total Loss: 1.61 | Total Steps: 155
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14945/200000  | Episode Reward: 7.0  | Average Reward 5.21  | Actor loss: 0.40 | Critic loss: 3.31 | Entropy loss: -0.0239  | Total Loss: 3.69 | Total Steps: 122
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14969/200000  | Episode Reward: 4.0  | Average Reward 5.29  | Actor loss: -0.20 | Critic loss: 4.37 | Entropy loss: -0.0076  | Total Loss: 4.16 | Total Steps: 373
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14970/200000  | Episode Reward: 3.0  | Average Reward 5.25  | Actor loss: -0.39 | Critic loss: 3.35 | Entropy loss: -0.0165  | Total Loss: 2.94 | Total Steps: 111
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14971/200000  | Episode Reward: 7.0  | Average Reward 5.26  | Actor loss: 0.78 | Critic loss: 10.59 | Entropy loss: -0.0003  | Total Loss: 11.36 | Total Steps: 253
--- target colour: yellow, target object: ca

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14994/200000  | Episode Reward: 7.0  | Average Reward 5.08  | Actor loss: -0.34 | Critic loss: 2.94 | Entropy loss: -0.0136  | Total Loss: 2.58 | Total Steps: 166
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14995/200000  | Episode Reward: 10.0  | Average Reward 5.09  | Actor loss: 0.19 | Critic loss: 3.13 | Entropy loss: -0.0015  | Total Loss: 3.31 | Total Steps: 19
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14996/200000  | Episode Reward: 7.0  | Average Reward 5.11  | Actor loss: -0.20 | Critic loss: 2.29 | Entropy loss: -0.0054  | Total Loss: 2.08 | Total Steps: 123
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 14997/200000  | Ep

Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15020/200000  | Episode Reward: -2.0  | Average Reward 5.50  | Actor loss: 0.11 | Critic loss: 3.53 | Entropy loss: -0.0004  | Total Loss: 3.64 | Total Steps: 262
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15021/200000  | Episode Reward: 4.0  | Average Reward 5.50  | Actor loss: -0.25 | Critic loss: 3.62 | Entropy loss: -0.0170  | Total Loss: 3.35 | Total Steps: 171
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15022/200000  | Episode Reward: -2.0  | Average Reward 5.46  | Actor loss: -0.27 | Critic loss: 4.38 | Entropy loss: -0.0197  | Total Loss: 4.09 | Total Steps: 163
--- target colour: red, targ

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15046/200000  | Episode Reward: -3.0  | Average Reward 5.67  | Actor loss: -0.55 | Critic loss: 5.52 | Entropy loss: -0.0273  | Total Loss: 4.94 | Total Steps: 188
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15047/200000  | Episode Reward: 10.0  | Average Reward 5.69  | Actor loss: 0.18 | Critic loss: 1.01 | Entropy loss: -0.0015  | Total Loss: 1.19 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15048/200000  | Episode Reward: 7.0  | Average Reward 5.71  | Actor loss: -0.28 | Critic loss: 4.92 | Entropy loss: -0.0131  | Total Loss: 4.63 | Total Steps: 162
--- target colour: black, target object: cube ---
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15072/200000  | Episode Reward: 10.0  | Average Reward 5.64  | Actor loss: 0.25 | Critic loss: 2.55 | Entropy loss: -0.0016  | Total Loss: 2.80 | Total Steps: 17
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15073/200000  | Episode Reward: 1.0  | Average Reward 5.60  | Actor loss: -0.33 | Critic loss: 7.03 | Entropy loss: -0.0121  | Total Loss: 6.69 | Total Steps: 112
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15074/200000  | Episode Reward: 10.0  | Average Reward 5.60  | Actor loss: 0.16 | Critic loss: 2.39 | Entropy loss: -0.0077  | Total Loss: 2.54 | Total Steps: 96
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15098/200000  | Episode Reward: 1.0  | Average Reward 5.71  | Actor loss: -0.21 | Critic loss: 4.90 | Entropy loss: -0.0215  | Total Loss: 4.67 | Total Steps: 200
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15099/200000  | Episode Reward: 4.0  | Average Reward 5.73  | Actor loss: -0.10 | Critic loss: 3.07 | Entropy loss: -0.0272  | Total Loss: 2.94 | Total Steps: 161
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15100/200000  | Episode Reward: 4.0  | Average Reward 5.73  | Actor loss: -0.46 | Critic loss: 5.79 | Entropy loss: -0.0123  | Total Loss: 5.32 | Total Steps: 162
--- target colour: green, target object: 

Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 5.94. Model has been saved-----
Training  | Episode: 15124/200000  | Episode Reward: 10.0  | Average Reward 5.94  | Actor loss: 0.23 | Critic loss: 6.49 | Entropy loss: -0.0018  | Total Loss: 6.72 | Total Steps: 19
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.0. Model has been saved-----
Training  | Episode: 15125/200000  | Episode Reward: 7.0  | Average Reward 6.00  | Actor loss: -0.09 | Critic loss: 3.11 | Entropy loss: -0.0041  | Total Loss: 3.01 | Total Steps: 134
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15126/200000  | Episode Reward: 10.0  | Average Reward 6.00  | Actor loss: 0.17 | Critic loss: 1.68 | Entropy loss: -0.0014  | Total Loss: 1

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15149/200000  | Episode Reward: 10.0  | Average Reward 5.92  | Actor loss: 1.52 | Critic loss: 9.93 | Entropy loss: -0.0058  | Total Loss: 11.44 | Total Steps: 57
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15150/200000  | Episode Reward: 10.0  | Average Reward 5.92  | Actor loss: 0.15 | Critic loss: 1.89 | Entropy loss: -0.0014  | Total Loss: 2.04 | Total Steps: 17
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15151/200000  | Episode Reward: 10.0  | Average Reward 5.96  | Actor loss: -0.15 | Critic loss: 3.84 | Entropy loss: -0.0102  | Total Loss: 3.68 | Total Steps: 106
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15152/200000  | Episode Reward: 7.0  | A

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
-----The best score for averaging previous 200 episode reward is 6.095. Model has been saved-----
Training  | Episode: 15175/200000  | Episode Reward: 7.0  | Average Reward 6.09  | Actor loss: -0.40 | Critic loss: 4.06 | Entropy loss: -0.0282  | Total Loss: 3.63 | Total Steps: 194
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15176/200000  | Episode Reward: 4.0  | Average Reward 6.07  | Actor loss: -0.20 | Critic loss: 2.22 | Entropy loss: -0.0356  | Total Loss: 1.99 | Total Steps: 212
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15177/200000  | Episode Reward: 4.0  | Average Reward 6.08  | Actor loss: -0.06 | Critic loss: 2.36 | Entropy loss: -0.0245  | Tot

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15199/200000  | Episode Reward: 8.0  | Average Reward 6.24  | Actor loss: -0.47 | Critic loss: 4.22 | Entropy loss: -0.0279  | Total Loss: 3.72 | Total Steps: 161
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15200/200000  | Episode Reward: 1.0  | Average Reward 6.22  | Actor loss: -0.27 | Critic loss: 4.28 | Entropy loss: -0.0175  | Total Loss: 3.99 | Total Steps: 122
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15201/200000  | Episode Reward: 4.0  | Average Reward 6.19  | Actor loss: -0.28 | Critic loss: 5.18 | Entropy loss: -0.0086  | Total Loss: 4.89 | Total Steps: 163
--- target colour: red, target object: cylinder ---
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15225/200000  | Episode Reward: 7.0  | Average Reward 6.20  | Actor loss: -0.24 | Critic loss: 4.87 | Entropy loss: -0.0068  | Total Loss: 4.63 | Total Steps: 130
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15226/200000  | Episode Reward: 10.0  | Average Reward 6.20  | Actor loss: 0.19 | Critic loss: 0.88 | Entropy loss: -0.0013  | Total Loss: 1.07 | Total Steps: 16
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15227/200000  | Episode Reward: 7.0  | Average Reward 6.21  | Actor loss: -0.33 | Critic loss: 3.05 | Entropy loss: -0.0079  | Total Loss: 2.71 | Total Steps: 143
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15251/200000  | Episode Reward: 10.0  | Average Reward 6.34  | Actor loss: 0.10 | Critic loss: 1.43 | Entropy loss: -0.0056  | Total Loss: 1.52 | Total Steps: 93
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15252/200000  | Episode Reward: 1.0  | Average Reward 6.30  | Actor loss: -0.44 | Critic loss: 5.95 | Entropy loss: -0.0119  | Total Loss: 5.50 | Total Steps: 156
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15253/200000  | Episode Reward: 4.0  | Average Reward 6.27  | Actor loss: -0.40 | Critic loss: 2.97 | Entropy loss: -0.0108  | Total Loss: 2.55 | Total Steps: 153
--- target colour: blue, target object: capsule ---
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15275/200000  | Episode Reward: 10.0  | Average Reward 6.50  | Actor loss: 0.23 | Critic loss: 2.08 | Entropy loss: -0.0015  | Total Loss: 2.31 | Total Steps: 18
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15276/200000  | Episode Reward: 10.0  | Average Reward 6.51  | Actor loss: -0.00 | Critic loss: 0.90 | Entropy loss: -0.0007  | Total Loss: 0.90 | Total Steps: 21
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15277/200000  | Episode Reward: -2.0  | Average Reward 6.46  | Actor loss: -0.58 | Critic loss: 6.25 | Entropy loss: -0.0161  | Total Loss: 5.65 | Total Steps: 170
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15300/200000  | Episode Reward: 4.0  | Average Reward 6.23  | Actor loss: -0.20 | Critic loss: 2.21 | Entropy loss: -0.0103  | Total Loss: 2.01 | Total Steps: 140
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15301/200000  | Episode Reward: 4.0  | Average Reward 6.20  | Actor loss: -0.20 | Critic loss: 2.89 | Entropy loss: -0.0089  | Total Loss: 2.68 | Total Steps: 96
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15302/200000  | Episode Reward: -3.0  | Average Reward 6.15  | Actor loss: -0.76 | Critic loss: 3.71 | Entropy loss: -0.0422  | Total Loss: 2.90 | Total Step

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15326/200000  | Episode Reward: 0.0  | Average Reward 5.93  | Actor loss: -0.16 | Critic loss: 6.30 | Entropy loss: -0.0325  | Total Loss: 6.10 | Total Steps: 212
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15327/200000  | Episode Reward: 1.0  | Average Reward 5.88  | Actor loss: -0.29 | Critic loss: 4.79 | Entropy loss: -0.0065  | Total Loss: 4.50 | Total Steps: 140
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15328/200000  | Episode Reward: 10.0  | Average Reward 5.93  | Actor loss: 0.22 | Critic loss: 2.60 | Entropy loss: -0.0014  | Total Loss: 2.82 | Total Steps: 17
--- target colour: blue, target object: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15351/200000  | Episode Reward: 1.0  | Average Reward 5.74  | Actor loss: -0.46 | Critic loss: 3.95 | Entropy loss: -0.0136  | Total Loss: 3.48 | Total Steps: 178
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15352/200000  | Episode Reward: -5.0  | Average Reward 5.67  | Actor loss: -0.51 | Critic loss: 4.23 | Entropy loss: -0.0242  | Total Loss: 3.70 | Total Steps: 183
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15353/200000  | Episode Reward: 1.0  | Average Reward 5.64  | Actor loss: -0.29 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15375/200000  | Episode Reward: 4.0  | Average Reward 5.41  | Actor loss: -0.11 | Critic loss: 3.80 | Entropy loss: -0.0119  | Total Loss: 3.68 | Total Steps: 159
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15376/200000  | Episode Reward: 10.0  | Average Reward 5.43  | Actor loss: 0.05 | Critic loss: 4.57 | Entropy loss: -0.0002  | Total Loss: 4.62 | Total Steps: 17
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15377/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: 0.72 | Critic loss: 1.53 | Entropy loss: -0.0026  | Total Loss: 2.24 | Total Steps: 23
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15378/200000  | Episo

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15401/200000  | Episode Reward: -2.0  | Average Reward 5.37  | Actor loss: -0.46 | Critic loss: 7.99 | Entropy loss: -0.0075  | Total Loss: 7.53 | Total Steps: 140
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15402/200000  | Episode Reward: 9.0  | Average Reward 5.36  | Actor loss: -0.30 | Critic loss: 3.24 | Entropy loss: -0.0344  | Total Loss: 2.90 | Total Steps: 184
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15403/200000  | Episode Reward: 4.0  | Average Reward 5.33  | Actor loss: -0.16 | Critic loss: 3.91 | Entropy loss: -0.0125  | Total Loss: 3.74 | Total Steps: 146
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal St

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15427/200000  | Episode Reward: 10.0  | Average Reward 5.34  | Actor loss: 0.08 | Critic loss: 3.82 | Entropy loss: -0.0027  | Total Loss: 3.90 | Total Steps: 23
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15428/200000  | Episode Reward: 7.0  | Average Reward 5.38  | Actor loss: -0.18 | Critic loss: 1.99 | Entropy loss: -0.0074  | Total Loss: 1.81 | Total Steps: 151
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15429/200000  | Episode Reward: 4.0  | Average Reward 5.38  | Actor loss: -0.36 | Critic loss: 1.81 | Entropy loss: -0.0251  | Total Loss: 1.42 | Total Steps: 240
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15451/200000  | Episode Reward: -21.0  | Average Reward 4.96  | Actor loss: -0.49 | Critic loss: 4.20 | Entropy loss: -0.0354  | Total Loss: 3.67 | Total Steps: 495
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15452/200000  | Episode Reward: 1.0  | Average Reward 4.96  | Actor loss: -0.38 | Critic loss: 3.76 | Entropy loss: -0.0151  | Total Loss: 3.36 | Total Steps: 186
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15453/200000  | Episode Reward: 1.0  | Average Reward 4.94  | Actor loss: -0.36 | Critic loss: 5.69 | Entropy loss: -0.0457  | Total Loss: 5.28 | Total Steps: 238
--- target

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15477/200000  | Episode Reward: 10.0  | Average Reward 4.74  | Actor loss: 0.66 | Critic loss: 6.68 | Entropy loss: -0.0038  | Total Loss: 7.34 | Total Steps: 23
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15478/200000  | Episode Reward: -6.0  | Average Reward 4.73  | Actor loss: 0.02 | Critic loss: 5.97 | Entropy loss: -0.0011  | Total Loss: 5.98 | Total Steps: 305
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15479/200000  | Episode Reward: 10.0  | Average Reward 4.73  | Actor loss: 0.34 | Critic loss: 2.27 | Entropy loss: -0.0014  | Total Loss: 2.61 | Total Steps: 17
--- target colour: green, target object: prism ---
Decisio

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15503/200000  | Episode Reward: 4.0  | Average Reward 5.03  | Actor loss: -0.63 | Critic loss: 1.94 | Entropy loss: -0.0186  | Total Loss: 1.29 | Total Steps: 144
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15504/200000  | Episode Reward: 1.0  | Average Reward 5.03  | Actor loss: -0.24 | Critic loss: 4.28 | Entropy loss: -0.0056  | Total Loss: 4.03 | Total Steps: 136
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15505/200000  | Episode Reward: -5.0  | Average Reward 4.97  | Actor loss: -0.52 | Critic loss: 7.59 | 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15529/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: -0.24 | Critic loss: 2.57 | Entropy loss: -0.0110  | Total Loss: 2.32 | Total Steps: 166
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15530/200000  | Episode Reward: -2.0  | Average Reward 5.11  | Actor loss: -0.78 | Critic loss: 6.57 | Entropy loss: -0.0198  | Total Loss: 5.78 | Total Steps: 156
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15531/200000  | Episode Reward: 10.0  | Average Reward 5.13  | Actor loss: 0.01 | Critic loss: 1.03 | Entropy loss: -0.0006  | Total Loss: 1.04 | Total Steps: 21
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15554/200000  | Episode Reward: 1.0  | Average Reward 5.28  | Actor loss: -0.21 | Critic loss: 5.22 | Entropy loss: -0.0139  | Total Loss: 4.99 | Total Steps: 151
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15555/200000  | Episode Reward: 10.0  | Average Reward 5.32  | Actor loss: -0.03 | Critic loss: 0.89 | Entropy loss: -0.0023  | Total Loss: 0.86 | Total Steps: 124
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15556/200000  | Episode Reward: -8.0  | Average Reward 5.30  | Actor loss: -0.54 | Critic loss: 7.44 | Entropy loss: -0.0128  

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15580/200000  | Episode Reward: 10.0  | Average Reward 5.51  | Actor loss: -0.15 | Critic loss: 2.68 | Entropy loss: -0.0235  | Total Loss: 2.51 | Total Steps: 160
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15581/200000  | Episode Reward: 4.0  | Average Reward 5.49  | Actor loss: -0.09 | Critic loss: 3.03 | Entropy loss: -0.0189  | Total Loss: 2.92 | Total Steps: 123
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15582/200000  | Episode Reward: 4.0  | Average Reward 5.46  | Actor loss: -0.60 | Critic loss: 3.91 | Entropy loss: -0.0127  | Total Loss: 3.30 | Total Steps: 130
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15605/200000  | Episode Reward: 10.0  | Average Reward 5.21  | Actor loss: 0.22 | Critic loss: 2.36 | Entropy loss: -0.0016  | Total Loss: 2.58 | Total Steps: 18
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15606/200000  | Episode Reward: 1.0  | Average Reward 5.20  | Actor loss: -0.37 | Critic loss: 3.55 | Entropy loss: -0.0137  | Total Loss: 3.17 | Total Steps: 167
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15607/200000  | Episode Reward: 3.0  | Average Reward 5.17  | Actor loss: -0.02 | Critic loss: 3.53 | Entropy loss: -0.0177  | Total Loss: 3.49 | Total Steps: 154
--- target colour: black, target object: cube ---
Decision Step re

Training  | Episode: 15630/200000  | Episode Reward: -11.0  | Average Reward 5.16  | Actor loss: -0.36 | Critic loss: 7.71 | Entropy loss: -0.0170  | Total Loss: 7.33 | Total Steps: 242
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15631/200000  | Episode Reward: 10.0  | Average Reward 5.17  | Actor loss: 0.47 | Critic loss: 6.98 | Entropy loss: -0.0019  | Total Loss: 7.45 | Total Steps: 24
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15632/200000  | Episode Reward: 7.0  | Average Reward 5.22  | Actor loss: 0.10 | Critic loss: 1.89 | Entropy loss: -0.0230  | Total Loss: 1.96 | Total Steps: 166
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15633/200000  | Episode Reward: 7.0  | Average Reward 5.22  | Actor 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15657/200000  | Episode Reward: -2.0  | Average Reward 5.63  | Actor loss: -0.47 | Critic loss: 4.17 | Entropy loss: -0.0173  | Total Loss: 3.68 | Total Steps: 168
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15658/200000  | Episode Reward: 10.0  | Average Reward 5.69  | Actor loss: 0.03 | Critic loss: 1.35 | Entropy loss: -0.0002  | Total Loss: 1.38 | Total Steps: 17
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15659/200000  | Episode Reward: 4.0  | Average Reward 5.69  | Actor loss: -0.16 | Critic loss: 7.75 | Entropy loss: -0.0076  | Total Loss: 7.59 | Total Steps: 105
--- target colour: green, target object: prism --

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15683/200000  | Episode Reward: 10.0  | Average Reward 5.83  | Actor loss: 0.10 | Critic loss: 1.87 | Entropy loss: -0.0017  | Total Loss: 1.96 | Total Steps: 18
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15684/200000  | Episode Reward: 10.0  | Average Reward 5.83  | Actor loss: 0.43 | Critic loss: 1.36 | Entropy loss: -0.0097  | Total Loss: 1.79 | Total Steps: 94
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15685/200000  | Episode Reward: 10.0  | Average Reward 5.83  | Actor loss: -0.04 | Critic loss: 0.62 | Entropy loss: -0.0009  | Total Loss: 0.58 | Total Steps: 21
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15686/200000  | Episode Reward: 10.0  | Average Reward 5.83  | Acto

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15708/200000  | Episode Reward: 4.0  | Average Reward 5.63  | Actor loss: -0.19 | Critic loss: 1.98 | Entropy loss: -0.0257  | Total Loss: 1.76 | Total Steps: 158
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15709/200000  | Episode Reward: 10.0  | Average Reward 5.63  | Actor loss: -0.07 | Critic loss: 1.43 | Entropy loss: -0.0097  | Total Loss: 1.35 | Total Steps: 140
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15710/200000  | Episode Reward: 1.0  | Average Reward 5.58  | Actor loss: -0.21 | Critic loss: 5.38 | Entropy loss: -0.0087  | Total Loss: 5.17 | Total Steps: 174
--- target colour: black, target object: cube ---
Agent in te

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15734/200000  | Episode Reward: 4.0  | Average Reward 5.43  | Actor loss: -0.29 | Critic loss: 2.54 | Entropy loss: -0.0121  | Total Loss: 2.23 | Total Steps: 185
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15735/200000  | Episode Reward: 1.0  | Average Reward 5.43  | Actor loss: -0.24 | Critic loss: 5.93 | Entropy loss: -0.0155  | Total Loss: 5.68 | Total Steps: 105
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15736/200000  | Episode Reward: 4.0  | Average Reward 5.43  | Actor loss: -0.21 | Critic loss: 2.13 | Entropy loss: -0.0159  | Total Loss: 1.91 | Total Steps: 118
--- target colou

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15759/200000  | Episode Reward: 10.0  | Average Reward 5.57  | Actor loss: -0.13 | Critic loss: 0.87 | Entropy loss: -0.0121  | Total Loss: 0.73 | Total Steps: 124
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15760/200000  | Episode Reward: 10.0  | Average Reward 5.59  | Actor loss: 0.75 | Critic loss: 5.63 | Entropy loss: -0.0011  | Total Loss: 6.38 | Total Steps: 18
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15761/200000  | Episode Reward: -2.0  | Average Reward 5.54  | Actor loss: -0.45 | Critic loss: 6.82 | Entropy loss: -0.0087  | Total Loss: 6.36 | Total Steps: 163
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step re

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15784/200000  | Episode Reward: 10.0  | Average Reward 5.41  | Actor loss: 0.57 | Critic loss: 1.71 | Entropy loss: -0.0279  | Total Loss: 2.25 | Total Steps: 137
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15785/200000  | Episode Reward: 1.0  | Average Reward 5.37  | Actor loss: 1.41 | Critic loss: 2.58 | Entropy loss: -0.0027  | Total Loss: 3.99 | Total Steps: 261
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15786/200000  | Episode Reward: 10.0  | Average Reward 5.39  | Actor loss: 0.18 | Critic loss: 4.19 | Entropy loss: -0.0168  | Total Loss: 4.36 | Total Steps: 103
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decisio

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15809/200000  | Episode Reward: 3.0  | Average Reward 5.43  | Actor loss: -0.51 | Critic loss: 2.75 | Entropy loss: -0.0289  | Total Loss: 2.22 | Total Steps: 202
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15810/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: -0.08 | Critic loss: 0.75 | Entropy loss: -0.0155  | Total Loss: 0.66 | Total Steps: 120
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15811/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: 0.54 | Critic loss: 1.13 | Entropy loss: -0.0031  | Total Loss: 1.66 | Total Steps: 18
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision S

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15834/200000  | Episode Reward: 10.0  | Average Reward 5.37  | Actor loss: 0.05 | Critic loss: 2.71 | Entropy loss: -0.0003  | Total Loss: 2.76 | Total Steps: 17
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15835/200000  | Episode Reward: -8.0  | Average Reward 5.28  | Actor loss: -0.43 | Critic loss: 8.50 | Entropy loss: -0.0175  | Total Loss: 8.05 | Total Steps: 242
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15836/200000  | Episode Reward: 1.0  | Average Reward 5.25  | Actor loss: -0.46 | Critic loss: 4.57 | Entropy loss: -0.0320  | T

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15860/200000  | Episode Reward: 1.0  | Average Reward 5.13  | Actor loss: -0.04 | Critic loss: 5.03 | Entropy loss: -0.0137  | Total Loss: 4.98 | Total Steps: 215
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15861/200000  | Episode Reward: 10.0  | Average Reward 5.13  | Actor loss: 0.55 | Critic loss: 8.06 | Entropy loss: -0.0021  | Total Loss: 8.61 | Total Steps: 24
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15862/200000  | Episode Reward: 10.0  | Average Reward 5.18  | Actor loss: -0.08 | Critic loss: 3.83 | Entropy loss: -0.0071  | Total Loss: 3.74 | Total Steps: 117
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15886/200000  | Episode Reward: 4.0  | Average Reward 5.04  | Actor loss: -0.15 | Critic loss: 2.22 | Entropy loss: -0.0113  | Total Loss: 2.06 | Total Steps: 139
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15887/200000  | Episode Reward: 4.0  | Average Reward 5.06  | Actor loss: -0.35 | Critic loss: 6.85 | Entropy loss: -0.0151  | Total Loss: 6.49 | Total Steps: 116
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15888/200000  | Episode Reward: 10.0  | Average Reward 5.09  | Actor loss: 0.02 | Critic loss: 1.60 | Entropy loss: -0.0011  | Total Loss: 1.62 | Total Steps: 21
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal S

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15913/200000  | Episode Reward: 7.0  | Average Reward 5.41  | Actor loss: -0.19 | Critic loss: 2.37 | Entropy loss: -0.0246  | Total Loss: 2.15 | Total Steps: 137
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15914/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: 0.36 | Critic loss: 2.43 | Entropy loss: -0.0023  | Total Loss: 2.79 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15915/200000  | Episode Reward: 4.0  | Average Reward 5.50  | Actor loss: -0.53 | Critic loss: 3.20 | Entropy loss: -0.0110  | Total Loss: 2.66 | Total Steps: 108
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15939/200000  | Episode Reward: 4.0  | Average Reward 5.51  | Actor loss: -0.24 | Critic loss: 3.55 | Entropy loss: -0.0194  | Total Loss: 3.29 | Total Steps: 166
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15940/200000  | Episode Reward: 10.0  | Average Reward 5.57  | Actor loss: 0.18 | Critic loss: 2.15 | Entropy loss: -0.0055  | Total Loss: 2.33 | Total Steps: 92
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15941/200000  | Episode Reward: 4.0  | Average Reward 5.55  | Actor loss: -0.48 | Critic loss: 3.54 | Entropy loss: -0.0142  | Total Loss: 3.04 | Total Steps: 152
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15964/200000  | Episode Reward: 1.0  | Average Reward 5.46  | Actor loss: -0.55 | Critic loss: 7.80 | Entropy loss: -0.0131  | Total Loss: 7.24 | Total Steps: 188
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15965/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: 0.01 | Critic loss: 0.73 | Entropy loss: -0.0010  | Total Loss: 0.73 | Total Steps: 21
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15966/200000  | Episode Reward: 10.0  | Average Reward 5.46  | Actor loss: -0.19 | Critic loss: 2.54 | Entropy loss: -0.0067  | Total Loss: 2.35 | Total Steps: 165
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epis

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15990/200000  | Episode Reward: -2.0  | Average Reward 5.59  | Actor loss: -0.42 | Critic loss: 5.58 | Entropy loss: -0.0157  | Total Loss: 5.14 | Total Steps: 151
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15991/200000  | Episode Reward: 7.0  | Average Reward 5.70  | Actor loss: -0.11 | Critic loss: 1.93 | Entropy loss: -0.0110  | Total Loss: 1.81 | Total Steps: 184
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 15992/200000  | Episode Reward: 7.0  | Average Reward 5.71  | Actor loss: -0.01 | Critic loss: 6.46 | Entropy loss: -0.0060  | Total Loss: 6.43 | Total Steps: 92
--- target colour: yellow, target object: cylin

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16016/200000  | Episode Reward: 0.0  | Average Reward 5.83  | Actor loss: -0.75 | Critic loss: 4.60 | Entropy loss: -0.0258  | Total Loss: 3.83 | Total Steps: 167
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16017/200000  | Episode Reward: 4.0  | Average Reward 5.79  | Actor loss: -0.45 | Critic loss: 3.97 | Entropy loss: -0.0114  | Total Loss: 3.50 | Total Steps: 152
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16018/200000  | Episode Reward: 7.0  | Average Reward 5.81  | Actor loss: -0.19 | Critic loss: 4.96 | Entropy loss: -0.0057  | Total Loss: 4.76 | Total Steps: 94
--- target colour: black, target object: prism ---
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16042/200000  | Episode Reward: 4.0  | Average Reward 6.16  | Actor loss: -0.31 | Critic loss: 3.71 | Entropy loss: -0.0111  | Total Loss: 3.39 | Total Steps: 134
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16043/200000  | Episode Reward: 9.0  | Average Reward 6.15  | Actor loss: 0.04 | Critic loss: 1.70 | Entropy loss: -0.0203  | Total Loss: 1.72 | Total Steps: 120
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16044/200000  | Episode Reward: 10.0  | Average Reward 6.17  | Actor loss: 0.42 | Critic loss: 2.61 | Entropy loss: -0.0015  | Total Loss: 3.03 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step re

Training  | Episode: 16067/200000  | Episode Reward: -2.0  | Average Reward 6.00  | Actor loss: -0.61 | Critic loss: 6.27 | Entropy loss: -0.0168  | Total Loss: 5.64 | Total Steps: 155
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Max Step Reward: -10
Step: 500
Training  | Episode: 16068/200000  | Episode Reward: -23.0  | Average Reward 5.88  | Actor loss: -1.27 | Critic loss: 9.87 | Entropy loss: -0.0329  | Total Loss: 8.57 | Total Steps: 500
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16069/200000  | Episode Reward: 10.0  | Average Reward 5.95  | Actor loss: 0.06 | Critic loss: 9.08 | Entropy loss: -0.0003  | Total Loss: 9.14 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16094/200000  | Episode Reward: 1.0  | Average Reward 5.99  | Actor loss: 0.57 | Critic loss: 16.60 | Entropy loss: -0.0015  | Total Loss: 17.18 | Total Steps: 267
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16095/200000  | Episode Reward: -3.0  | Average Reward 5.92  | Actor loss: -0.74 | Critic loss: 4.08 | Entropy loss: -0.0326  | Total Loss: 3.31 | Total Steps: 194
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16096/200000  | Episode Reward: 1.0  | Average Reward 5.91  | Actor loss: -0.64 | Critic lo

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16120/200000  | Episode Reward: 7.0  | Average Reward 5.96  | Actor loss: 0.10 | Critic loss: 3.92 | Entropy loss: -0.0066  | Total Loss: 4.01 | Total Steps: 96
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16121/200000  | Episode Reward: 3.0  | Average Reward 5.97  | Actor loss: -0.53 | Critic loss: 2.38 | Entropy loss: -0.0411  | Total Loss: 1.80 | Total Steps: 218
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16122/200000  | Episode Reward: 7.0  | Average Reward 5.99  | Actor loss: -0.04 | Critic loss: 1.82 | Entropy loss: -0.0100  | Total Loss: 1.77 | Total Steps: 130
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16146/200000  | Episode Reward: 4.0  | Average Reward 5.82  | Actor loss: -0.51 | Critic loss: 2.67 | Entropy loss: -0.0159  | Total Loss: 2.15 | Total Steps: 107
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16147/200000  | Episode Reward: 1.0  | Average Reward 5.78  | Actor loss: -0.26 | Critic loss: 4.95 | Entropy loss: -0.0078  | Total Loss: 4.68 | Total Steps: 142
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16148/200000  | Episode Reward: 10.0  | Average Reward 5.80  | Actor loss: 0.05 | Critic loss: 2.62 | Entropy loss: -0.0002  | Total Loss: 2.67 | Total Steps: 17
--- target colour: black, target object: prism ---
Agent in terminal 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16172/200000  | Episode Reward: 7.0  | Average Reward 6.03  | Actor loss: 0.17 | Critic loss: 3.34 | Entropy loss: -0.0116  | Total Loss: 3.50 | Total Steps: 94
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16173/200000  | Episode Reward: 10.0  | Average Reward 6.03  | Actor loss: -0.01 | Critic loss: 3.92 | Entropy loss: -0.0052  | Total Loss: 3.90 | Total Steps: 96
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16174/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor loss: 0.13 | Critic loss: 2.34 | Entropy loss: -0.0012  | Total Loss: 2.47 | Total Steps: 17
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16175/200000  | Episode Reward: 10.0  | Average R

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16199/200000  | Episode Reward: 1.0  | Average Reward 6.17  | Actor loss: -0.30 | Critic loss: 5.91 | Entropy loss: -0.0051  | Total Loss: 5.61 | Total Steps: 163
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16200/200000  | Episode Reward: -2.0  | Average Reward 6.13  | Actor loss: -0.33 | Critic loss: 3.29 | Entropy loss: -0.0153  | Total Loss: 2.95 | Total Steps: 224
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16201/200000  | Episode Reward: 0.0  | Average Reward 6.10  | Actor loss: -0.69 | Critic loss: 4.10 | Ent

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16225/200000  | Episode Reward: 10.0  | Average Reward 6.16  | Actor loss: -0.22 | Critic loss: 12.56 | Entropy loss: -0.0021  | Total Loss: 12.34 | Total Steps: 47
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16226/200000  | Episode Reward: 10.0  | Average Reward 6.16  | Actor loss: 0.18 | Critic loss: 1.95 | Entropy loss: -0.0011  | Total Loss: 2.13 | Total Steps: 16
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16227/200000  | Episode Reward: 10.0  | Average Reward 6.16  | Actor loss: -0.32 | Critic loss: 3.40 | Entropy loss: -0.0120  | Total Loss: 3.07 | Total Steps: 148
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  |

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16251/200000  | Episode Reward: 10.0  | Average Reward 6.17  | Actor loss: 0.07 | Critic loss: 1.72 | Entropy loss: -0.0017  | Total Loss: 1.79 | Total Steps: 18
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16252/200000  | Episode Reward: 10.0  | Average Reward 6.19  | Actor loss: 0.37 | Critic loss: 6.45 | Entropy loss: -0.0015  | Total Loss: 6.82 | Total Steps: 19
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16253/200000  | Episode Reward: 1.0  | Average Reward 6.19  | Actor loss: -0.55 | Critic loss: 5.73 | Entropy loss: -0.0219  | Total Loss: 5.16 | Total Steps: 149
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in term

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16276/200000  | Episode Reward: 10.0  | Average Reward 6.13  | Actor loss: 0.15 | Critic loss: 3.28 | Entropy loss: -0.0013  | Total Loss: 3.43 | Total Steps: 19
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16277/200000  | Episode Reward: 4.0  | Average Reward 6.13  | Actor loss: -0.19 | Critic loss: 4.61 | Entropy loss: -0.0061  | Total Loss: 4.41 | Total Steps: 100
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16278/200000  | Episode Reward: 1.0  | Average Reward 6.09  | Actor loss: -0.21 | Critic loss: 5.27 | Entropy loss: -0.0189  | Total Loss: 5.04 | Total Steps: 149
--- target colour: red, target object: cylinder ---
Decision St

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16301/200000  | Episode Reward: 10.0  | Average Reward 5.94  | Actor loss: 0.25 | Critic loss: 2.14 | Entropy loss: -0.0015  | Total Loss: 2.39 | Total Steps: 17
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16302/200000  | Episode Reward: 10.0  | Average Reward 5.94  | Actor loss: 0.03 | Critic loss: 1.28 | Entropy loss: -0.0018  | Total Loss: 1.31 | Total Steps: 19
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16303/200000  | Episode Reward: 7.0  | Average Reward 5.92  | Actor loss: -0.10 | Critic loss: 2.31 | Entropy loss: -0.0030  | Total Loss: 2.21 | Total Steps: 148
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16327/200000  | Episode Reward: 10.0  | Average Reward 5.80  | Actor loss: 0.30 | Critic loss: 5.41 | Entropy loss: -0.0011  | Total Loss: 5.71 | Total Steps: 16
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16328/200000  | Episode Reward: 10.0  | Average Reward 5.80  | Actor loss: 0.37 | Critic loss: 3.02 | Entropy loss: -0.0012  | Total Loss: 3.38 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16329/200000  | Episode Reward: 7.0  | Average Reward 5.80  | Actor loss: -0.31 | Critic loss: 4.00 | Entropy loss: -0.0138  | Total Loss: 3.68 | Total Steps: 179
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16330/200000  | Episode Reward: 10.0  | Aver

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16352/200000  | Episode Reward: -11.0  | Average Reward 5.59  | Actor loss: -0.76 | Critic loss: 8.15 | Entropy loss: -0.0223  | Total Loss: 7.36 | Total Steps: 246
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16353/200000  | Episode Reward: 10.0  | Average Reward 5.64  | Actor loss: 0.09 | Critic loss: 2.00 | Entropy loss: -0.0104  | Total Loss: 2.08 | Total Steps: 143
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16354/200000  | Episode Reward: 10.0  | Average Reward 5.64  | Actor loss: 0.12 | Critic loss: 6.79 | Entropy loss: -0.0003  | Total Loss: 6.91 | Total Steps: 17
--- target colour: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16377/200000  | Episode Reward: -11.0  | Average Reward 5.32  | Actor loss: -0.03 | Critic loss: 4.75 | Entropy loss: -0.0018  | Total Loss: 4.72 | Total Steps: 284
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16378/200000  | Episode Reward: -3.0  | Average Reward 5.29  | Actor loss: -0.51 | Critic loss: 4.90 | Entropy loss: -0.0292  | Total Loss: 4.35 | Total Steps: 169
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16379/200000  | Episode Reward: 10.0  | Average Reward 5.29  | A

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16402/200000  | Episode Reward: 1.0  | Average Reward 5.28  | Actor loss: -0.56 | Critic loss: 5.57 | Entropy loss: -0.0166  | Total Loss: 4.99 | Total Steps: 167
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16403/200000  | Episode Reward: 7.0  | Average Reward 5.26  | Actor loss: -0.13 | Critic loss: 4.48 | Entropy loss: -0.0042  | Total Loss: 4.34 | Total Steps: 91
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16404/200000  | Episode Reward: 4.0  | Average Reward 5.28  | Actor loss: -0.16 | Critic loss: 4.08 | Entropy loss: -0.0106  | Total Loss: 3.91 | Total Steps: 176
--- target colour: yellow, target object: c

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16428/200000  | Episode Reward: 7.0  | Average Reward 5.22  | Actor loss: -0.40 | Critic loss: 4.27 | Entropy loss: -0.0062  | Total Loss: 3.86 | Total Steps: 115
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16429/200000  | Episode Reward: -9.0  | Average Reward 5.12  | Actor loss: -0.04 | Critic loss: 3.10 | Entropy loss: -0.0095  | Total Loss: 3.05 | Total Steps: 342
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16430/200000  | Episode Reward: 7.0  | Average Reward 5.14  | Actor loss: -0.20 | Critic loss: 3.52 | Entropy loss: -0.0164  | Total Loss: 3.31

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16453/200000  | Episode Reward: 3.0  | Average Reward 4.98  | Actor loss: -0.36 | Critic loss: 6.21 | Entropy loss: -0.0225  | Total Loss: 5.83 | Total Steps: 157
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16454/200000  | Episode Reward: 1.0  | Average Reward 4.96  | Actor loss: -0.31 | Critic loss: 2.86 | Entropy loss: -0.0325  | Total Loss: 2.51 | Total Steps: 191
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16455/200000  | Episode Reward: 4.0  | Average Reward 4.96  | Actor loss: -0.06 | Critic loss: 2.33 | Entropy loss: -0.0086  | Total Loss: 2.26 | Total Steps: 98
--- target colour: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16479/200000  | Episode Reward: 7.0  | Average Reward 5.11  | Actor loss: -0.24 | Critic loss: 8.63 | Entropy loss: -0.0024  | Total Loss: 8.39 | Total Steps: 74
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16480/200000  | Episode Reward: 7.0  | Average Reward 5.12  | Actor loss: 0.00 | Critic loss: 2.99 | Entropy loss: -0.0036  | Total Loss: 2.99 | Total Steps: 158
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16481/200000  | Episode Reward: 5.0  | Average Reward 5.09  | Actor loss: -0.49 | Critic loss: 3.34 | Entropy loss: -0.0336  | Total Loss: 2.82 | Total Steps: 204
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16505/200000  | Episode Reward: 10.0  | Average Reward 5.20  | Actor loss: 0.22 | Critic loss: 2.90 | Entropy loss: -0.0041  | Total Loss: 3.11 | Total Steps: 91
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16506/200000  | Episode Reward: 10.0  | Average Reward 5.23  | Actor loss: 0.06 | Critic loss: 1.89 | Entropy loss: -0.0055  | Total Loss: 1.95 | Total Steps: 95
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16507/200000  | Episode Reward: 10.0  | Average Reward 5.29  | Actor loss: 0.52 | Critic loss: 1.84 | Entropy loss: -0.0012  | Total Loss: 2.35 | Total Steps: 18
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16508/200000  | Episode Reward: 10.0  | Average Reward 5.33  | Actor loss: 0

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16531/200000  | Episode Reward: 4.0  | Average Reward 5.20  | Actor loss: -0.23 | Critic loss: 3.96 | Entropy loss: -0.0071  | Total Loss: 3.72 | Total Steps: 113
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16532/200000  | Episode Reward: 10.0  | Average Reward 5.25  | Actor loss: 0.11 | Critic loss: 2.12 | Entropy loss: -0.0080  | Total Loss: 2.23 | Total Steps: 92
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16533/200000  | Episode Reward: 10.0  | Average Reward 5.25  | Actor loss: 0.12 | Critic loss: 3.57 | Entropy loss: -0.0096  | Total Loss: 3.69 | Total Steps: 115
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16534/200000  | Episod

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16558/200000  | Episode Reward: 4.0  | Average Reward 5.42  | Actor loss: -0.29 | Critic loss: 4.50 | Entropy loss: -0.0139  | Total Loss: 4.19 | Total Steps: 106
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16559/200000  | Episode Reward: 7.0  | Average Reward 5.43  | Actor loss: -0.11 | Critic loss: 2.80 | Entropy loss: -0.0254  | Total Loss: 2.66 | Total Steps: 141
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16560/200000  | Episode Reward: 10.0  | Average Reward 5.48  | Actor loss: 0.84 | Critic loss: 2.65 | Entropy loss: -0.0018  | Total Loss: 3.49 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16584/200000  | Episode Reward: 10.0  | Average Reward 5.71  | Actor loss: 0.10 | Critic loss: 4.28 | Entropy loss: -0.0003  | Total Loss: 4.38 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16585/200000  | Episode Reward: 10.0  | Average Reward 5.83  | Actor loss: 0.17 | Critic loss: 4.95 | Entropy loss: -0.0045  | Total Loss: 5.12 | Total Steps: 78
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16586/200000  | Episode Reward: 10.0  | Average Reward 5.84  | Actor loss: 0.25 | Critic loss: 2.94 | Entropy loss: -0.0090  | Total Loss: 3.18 | Total Steps: 92
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16587/200000  | Episode Reward: 10.0  | Average Reward 5.84  | Actor loss:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16610/200000  | Episode Reward: 7.0  | Average Reward 5.88  | Actor loss: -0.13 | Critic loss: 1.24 | Entropy loss: -0.0321  | Total Loss: 1.08 | Total Steps: 192
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16611/200000  | Episode Reward: 4.0  | Average Reward 5.86  | Actor loss: -0.51 | Critic loss: 3.20 | Entropy loss: -0.0099  | Total Loss: 2.68 | Total Steps: 114
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16612/200000  | Episode Reward: 10.0  | Average Reward 5.86  | Actor loss: 0.01 | Critic loss: 1.18 | Entropy loss: -0.0010  | Total Loss: 1.19 | Total Steps: 21
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision S

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16635/200000  | Episode Reward: 1.0  | Average Reward 5.64  | Actor loss: -0.18 | Critic loss: 2.74 | Entropy loss: -0.0207  | Total Loss: 2.54 | Total Steps: 131
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16636/200000  | Episode Reward: -14.0  | Average Reward 5.54  | Actor loss: -0.60 | Critic loss: 6.10 | Entropy loss: -0.0122  | Total Loss: 5.48 | Total Steps: 430
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16637/200000 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16660/200000  | Episode Reward: 7.0  | Average Reward 5.61  | Actor loss: 0.14 | Critic loss: 2.20 | Entropy loss: -0.0055  | Total Loss: 2.33 | Total Steps: 91
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16661/200000  | Episode Reward: 7.0  | Average Reward 5.62  | Actor loss: 0.00 | Critic loss: 3.44 | Entropy loss: -0.0143  | Total Loss: 3.43 | Total Steps: 134
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16662/200000  | Episode Reward: 10.0  | Average Reward 5.66  | Actor loss: 0.14 | Critic loss: 1.50 | Entropy loss: -0.0053  | Total Loss: 1.63 | Total Steps: 90
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termina

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16686/200000  | Episode Reward: 4.0  | Average Reward 5.58  | Actor loss: -0.33 | Critic loss: 2.12 | Entropy loss: -0.0105  | Total Loss: 1.77 | Total Steps: 106
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16687/200000  | Episode Reward: 10.0  | Average Reward 5.65  | Actor loss: 0.26 | Critic loss: 5.86 | Entropy loss: -0.0015  | Total Loss: 6.12 | Total Steps: 23
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16688/200000  | Episode Reward: 10.0  | Average Reward 5.68  | Actor loss: 0.18 | Critic loss: 1.51 | Entropy loss: -0.0011  | Total Loss: 1.69 | Total Steps: 17
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16712/200000  | Episode Reward: -5.0  | Average Reward 5.47  | Actor loss: -0.85 | Critic loss: 6.55 | Entropy loss: -0.0262  | Total Loss: 5.67 | Total Steps: 183
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16713/200000  | Episode Reward: 9.0  | Average Reward 5.51  | Actor loss: -0.13 | Critic loss: 1.85 | Entropy loss: -0.0076  | Total Loss: 1.70 | Total Steps: 102
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16714/200000  | Episode Reward: 10.0  | Average Reward 5.51  | Actor loss: 0.35 | Critic loss: 11.15 | Entropy loss: -0.0019  | Total Loss: 11.50 | Total Steps: 54
--- target colour: yellow, target object: cylinder ---
Decision S

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16738/200000  | Episode Reward: 1.0  | Average Reward 5.50  | Actor loss: -0.56 | Critic loss: 5.93 | Entropy loss: -0.0257  | Total Loss: 5.34 | Total Steps: 205
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16739/200000  | Episode Reward: 10.0  | Average Reward 5.50  | Actor loss: 0.03 | Critic loss: 2.61 | Entropy loss: -0.0002  | Total Loss: 2.64 | Total Steps: 17
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16740/200000  | Episode Reward: 3.0  | Average Reward 5.50  | Actor loss: -0.43 | Critic loss: 3.50 | Entropy loss: -0.0279  | Total Loss: 3.04 | Total Steps: 241
--- target colour: blue, target object: capsule ---
Decision Step rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16764/200000  | Episode Reward: 3.0  | Average Reward 5.29  | Actor loss: -0.15 | Critic loss: 5.06 | Entropy loss: -0.0143  | Total Loss: 4.89 | Total Steps: 140
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16765/200000  | Episode Reward: 4.0  | Average Reward 5.25  | Actor loss: -0.22 | Critic loss: 3.60 | Entropy loss: -0.0165  | Total Loss: 3.36 | Total Steps: 136
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16766/200000  | Episode Reward: 7.0  | Average Reward 5.25  | Actor loss: 0.04 | Critic loss: 4.24 | Entropy loss: -0.0044  | Total Loss: 4.27 | Total Steps: 100
--- target colour: red, target object: sphere ---
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16790/200000  | Episode Reward: 6.0  | Average Reward 5.08  | Actor loss: -0.09 | Critic loss: 4.26 | Entropy loss: -0.0344  | Total Loss: 4.13 | Total Steps: 203
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16791/200000  | Episode Reward: -2.0  | Average Reward 5.04  | Actor loss: -0.45 | Critic loss: 5.26 | Entropy loss: -0.0164  | Total Loss: 4.79 | Total Steps: 239
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16792/200000  | Episode Reward: -5.0  | Average Reward 5.01  | Actor loss: -0.38 | Critic loss: 8.32 | Entr

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16815/200000  | Episode Reward: 1.0  | Average Reward 4.86  | Actor loss: -0.18 | Critic loss: 6.61 | Entropy loss: -0.0106  | Total Loss: 6.42 | Total Steps: 112
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16816/200000  | Episode Reward: 7.0  | Average Reward 4.84  | Actor loss: -0.29 | Critic loss: 3.30 | Entropy loss: -0.0207  | Total Loss: 2.99 | Total Steps: 182
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16817/200000  | Episode Reward: 7.0  | Average Reward 4.83  | Actor loss: -0.23 | Critic loss: 1.91 | Entropy loss: -0.0201  | Total Loss: 1.65 | Total Steps: 156
--- target colour: blue, target object: cube ---
Agent in termi

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16840/200000  | Episode Reward: 10.0  | Average Reward 5.08  | Actor loss: 0.24 | Critic loss: 1.14 | Entropy loss: -0.0181  | Total Loss: 1.35 | Total Steps: 127
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16841/200000  | Episode Reward: -2.0  | Average Reward 5.03  | Actor loss: -0.32 | Critic loss: 7.08 | Entropy loss: -0.0077  | Total Loss: 6.75 | Total Steps: 181
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16842/200000  | Episode Reward: 10.0  | Average Reward 5.03  | Actor loss: 0.23 | Critic loss: 1.89 | Entropy loss: -0.0014  | Total Loss: 2.12 | Total Steps: 17
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16866/200000  | Episode Reward: 9.0  | Average Reward 5.26  | Actor loss: -0.09 | Critic loss: 4.21 | Entropy loss: -0.0271  | Total Loss: 4.10 | Total Steps: 173
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16867/200000  | Episode Reward: 1.0  | Average Reward 5.22  | Actor loss: -0.54 | Critic loss: 5.89 | Entropy loss: -0.0325  | Total Loss: 5.32 | Total Steps: 201
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16868/200000  | Episode Reward: 10.0  | Average Reward 5.22  | Actor loss: 0.25 | Critic loss: 2.24 | Entropy loss: -0.0013  | Total Loss: 2.48 | Total Steps: 16
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episod

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16893/200000  | Episode Reward: -2.0  | Average Reward 5.29  | Actor loss: -0.26 | Critic loss: 5.84 | Entropy loss: -0.0079  | Total Loss: 5.57 | Total Steps: 138
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16894/200000  | Episode Reward: 7.0  | Average Reward 5.30  | Actor loss: -0.03 | Critic loss: 1.18 | Entropy loss: -0.0023  | Total Loss: 1.15 | Total Steps: 129
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16895/200000  | Episode Reward: 7.0  | Average Reward 5.33  | Actor loss: -0.39 | Critic loss: 2.63 | Entropy loss: -0.0121  | Total Loss: 2.23 | Total Steps: 151
--- target colour: green, target object: pris

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16919/200000  | Episode Reward: 10.0  | Average Reward 5.54  | Actor loss: 0.12 | Critic loss: 1.41 | Entropy loss: -0.0011  | Total Loss: 1.52 | Total Steps: 17
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16920/200000  | Episode Reward: -5.0  | Average Reward 5.51  | Actor loss: -0.46 | Critic loss: 4.91 | Entropy loss: -0.0215  | Total Loss: 4.42 | Total Steps: 185
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16921/200000  | Episode Reward: 7.0  | Average Reward 5.50  | Actor loss: -0.06 | Critic loss: 1.73 | Entropy loss: -0.0125  | Total Loss: 1.66 | Total Steps: 207
--- target colour: blue, target object: c

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16943/200000  | Episode Reward: 10.0  | Average Reward 5.23  | Actor loss: 0.51 | Critic loss: 1.89 | Entropy loss: -0.0028  | Total Loss: 2.41 | Total Steps: 18
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16944/200000  | Episode Reward: 10.0  | Average Reward 5.23  | Actor loss: 0.55 | Critic loss: 5.48 | Entropy loss: -0.0026  | Total Loss: 6.03 | Total Steps: 23
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16945/200000  | Episode Reward: 6.0  | Average Reward 5.21  | Actor loss: -0.28 | Critic loss: 1.80 | Entropy loss: -0.0253  | Total Loss: 1.50 | Total Steps: 200
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16946/200000  | Episode Reward: 10.0  | Average

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16970/200000  | Episode Reward: 1.0  | Average Reward 5.38  | Actor loss: -0.40 | Critic loss: 8.62 | Entropy loss: -0.0090  | Total Loss: 8.21 | Total Steps: 103
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16971/200000  | Episode Reward: 3.0  | Average Reward 5.34  | Actor loss: -0.46 | Critic loss: 6.53 | Entropy loss: -0.0212  | Total Loss: 6.05 | Total Steps: 125
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16972/200000  | Episode Reward: 10.0  | Average Reward 5.38  | Actor loss: 0.08 | Critic loss: 0.97 | Entropy loss: -0.0016  | Total Loss: 1.05 | Total Steps: 18
--- target colour: yellow, target object: cube ---
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16996/200000  | Episode Reward: 1.0  | Average Reward 5.93  | Actor loss: -0.34 | Critic loss: 2.70 | Entropy loss: -0.0206  | Total Loss: 2.34 | Total Steps: 154
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16997/200000  | Episode Reward: 4.0  | Average Reward 5.92  | Actor loss: -0.44 | Critic loss: 6.73 | Entropy loss: -0.0221  | Total Loss: 6.27 | Total Steps: 113
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 16998/200000  | Episode Reward: 1.0  | Average Reward 5.90  | Actor loss: -0.24 | Critic loss: 7.01 | Entropy loss: -0.0058  | Total Loss: 6.76 | Total Steps: 147
--- target colour: red, target object: cy

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17021/200000  | Episode Reward: 7.0  | Average Reward 5.75  | Actor loss: -0.29 | Critic loss: 2.95 | Entropy loss: -0.0152  | Total Loss: 2.65 | Total Steps: 162
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17022/200000  | Episode Reward: 7.0  | Average Reward 5.74  | Actor loss: -0.05 | Critic loss: 2.18 | Entropy loss: -0.0109  | Total Loss: 2.12 | Total Steps: 152
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17023/200000  | Episode Reward: 10.0  | Average Reward 5.79  | Actor loss: 0.40 | Critic loss: 2.69 | Entropy loss: -0.0016  | Total Loss: 3.09 | Total Steps: 17
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17024/200000  | Episode 

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17047/200000  | Episode Reward: 10.0  | Average Reward 5.90  | Actor loss: -0.23 | Critic loss: 1.63 | Entropy loss: -0.0154  | Total Loss: 1.39 | Total Steps: 184
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 17048/200000  | Episode Reward: -11.0  | Average Reward 5.79  | Actor loss: -0.67 | Critic loss: 11.15 | Entropy loss: -0.0205  | Total Loss: 10.47 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17049/200000  | Episode Reward: 10.0  | Average Reward 5.79  | Actor loss: 0.03 | Critic loss: 1.57 | Entropy loss: -0.0003  | Total Loss: 1.60 | Total Steps: 17
---

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17073/200000  | Episode Reward: -5.0  | Average Reward 5.62  | Actor loss: -0.42 | Critic loss: 7.59 | Entropy loss: -0.0143  | Total Loss: 7.16 | Total Steps: 176
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17074/200000  | Episode Reward: -5.0  | Average Reward 5.54  | Actor loss: -0.33 | Critic loss: 4.02 | Entropy loss: -0.0105  | Total Loss: 3.67 | Total Steps: 148
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 170

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17099/200000  | Episode Reward: 10.0  | Average Reward 5.73  | Actor loss: 0.05 | Critic loss: 2.35 | Entropy loss: -0.0032  | Total Loss: 2.39 | Total Steps: 95
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17100/200000  | Episode Reward: 10.0  | Average Reward 5.76  | Actor loss: -0.21 | Critic loss: 2.16 | Entropy loss: -0.0027  | Total Loss: 1.95 | Total Steps: 89
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17101/200000  | Episode Reward: 10.0  | Average Reward 5.82  | Actor loss: 0.30 | Critic loss: 1.43 | Entropy loss: -0.0022  | Total Loss: 1.74 | Total Steps: 18
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17102/200000  | Episode Reward: 10.0  | Average Reward 5.86  | Actor loss

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17126/200000  | Episode Reward: 10.0  | Average Reward 6.08  | Actor loss: 0.05 | Critic loss: 1.16 | Entropy loss: -0.0016  | Total Loss: 1.20 | Total Steps: 91
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17127/200000  | Episode Reward: 10.0  | Average Reward 6.13  | Actor loss: 0.27 | Critic loss: 4.65 | Entropy loss: -0.0120  | Total Loss: 4.90 | Total Steps: 100
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17128/200000  | Episode Reward: 4.0  | Average Reward 6.13  | Actor loss: -0.11 | Critic loss: 1.83 | Entropy loss: -0.0253  | Total Loss: 1.69 | Total Steps: 200
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17129/200000  | E

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17151/200000  | Episode Reward: 1.0  | Average Reward 6.03  | Actor loss: -0.20 | Critic loss: 6.13 | Entropy loss: -0.0088  | Total Loss: 5.92 | Total Steps: 110
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17152/200000  | Episode Reward: 7.0  | Average Reward 6.01  | Actor loss: -0.24 | Critic loss: 3.23 | Entropy loss: -0.0144  | Total Loss: 2.98 | Total Steps: 134
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17153/200000  | Episode Reward: 4.0  | Average Reward 6.01  | Actor loss: -0.24 | Critic loss: 3.41 | Entropy loss: -0.0126  | Total Loss: 3.15 | Total Steps: 145
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Epi

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17178/200000  | Episode Reward: 4.0  | Average Reward 6.21  | Actor loss: -0.28 | Critic loss: 5.02 | Entropy loss: -0.0076  | Total Loss: 4.73 | Total Steps: 171
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17179/200000  | Episode Reward: -17.0  | Average Reward 6.09  | Actor loss: -0.33 | Critic loss: 8.72 | Entropy loss: -0.0080  | Total Loss: 8.38 | Total Steps: 388
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | E

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17204/200000  | Episode Reward: 10.0  | Average Reward 6.12  | Actor loss: -0.20 | Critic loss: 4.24 | Entropy loss: -0.0083  | Total Loss: 4.03 | Total Steps: 106
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17205/200000  | Episode Reward: -2.0  | Average Reward 6.08  | Actor loss: -0.49 | Critic loss: 3.35 | Entropy loss: -0.0296  | Total Loss: 2.83 | Total Steps: 203
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17206/200000  | Episode Reward: 4.0  | Average Reward 6.05  | Actor loss: -0.25 | Critic loss: 1.98 | Entropy loss: -0.0249  | Total Loss: 1.70 | Total Steps: 126
--- target colour: yellow, target obj

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17230/200000  | Episode Reward: 10.0  | Average Reward 6.31  | Actor loss: 0.20 | Critic loss: 1.70 | Entropy loss: -0.0012  | Total Loss: 1.90 | Total Steps: 16
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17231/200000  | Episode Reward: 10.0  | Average Reward 6.31  | Actor loss: 0.30 | Critic loss: 1.39 | Entropy loss: -0.0017  | Total Loss: 1.69 | Total Steps: 18
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17232/200000  | Episode Reward: 1.0  | Average Reward 6.26  | Actor loss: -0.40 | Critic loss: 4.48 | Entropy loss: -0.0175  | Total Loss: 4.07 | Total Steps: 167
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17256/200000  | Episode Reward: 4.0  | Average Reward 6.22  | Actor loss: -0.21 | Critic loss: 4.82 | Entropy loss: -0.0133  | Total Loss: 4.60 | Total Steps: 239
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17257/200000  | Episode Reward: 4.0  | Average Reward 6.20  | Actor loss: -0.24 | Critic loss: 1.73 | Entropy loss: -0.0124  | Total Loss: 1.48 | Total Steps: 104
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17258/200000  | Episode Reward: 7.0  | Average Reward 6.20  | Actor loss: 0.07 | Critic loss: 4.54 | Entropy loss: -0.0050  | Total Loss: 4.61 | Total Steps: 93
--- target colour: blue, target object: capsule ---
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17282/200000  | Episode Reward: 4.0  | Average Reward 6.25  | Actor loss: -0.24 | Critic loss: 8.17 | Entropy loss: -0.0103  | Total Loss: 7.91 | Total Steps: 108
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17283/200000  | Episode Reward: 10.0  | Average Reward 6.25  | Actor loss: 0.28 | Critic loss: 3.88 | Entropy loss: -0.0123  | Total Loss: 4.15 | Total Steps: 104
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17284/200000  | Episode Reward: 4.0  | Average Reward 6.21  | Actor loss: -0.34 | Critic loss: 4.74 | Entropy loss: -0.0074  | Total Loss: 4.39 | Total Steps: 167
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17308/200000  | Episode Reward: 1.0  | Average Reward 5.97  | Actor loss: -0.36 | Critic loss: 6.25 | Entropy loss: -0.0164  | Total Loss: 5.88 | Total Steps: 177
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17309/200000  | Episode Reward: 7.0  | Average Reward 5.96  | Actor loss: -0.44 | Critic loss: 4.70 | Entropy loss: -0.0091  | Total Loss: 4.25 | Total Steps: 94
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17310/200000  | Episode Reward: 7.0  | Average Reward 5.94  | Actor loss: -0.08 | Critic loss: 2.58 | Entropy loss: -0.0029  | Total Loss: 2.50 | Total Steps: 91
--- target colour: black, target object: prism ---
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17335/200000  | Episode Reward: 10.0  | Average Reward 6.17  | Actor loss: -0.28 | Critic loss: 4.67 | Entropy loss: -0.0112  | Total Loss: 4.38 | Total Steps: 100
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17336/200000  | Episode Reward: 7.0  | Average Reward 6.20  | Actor loss: -0.21 | Critic loss: 1.71 | Entropy loss: -0.0218  | Total Loss: 1.48 | Total Steps: 240
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17337/200000  | Episode Reward: 1.0  | Average Reward 6.22  | Actor loss: -0.12 | Critic loss: 1.98 | Entropy loss: -0.0288  | Total Loss: 1.83 | Total Steps: 180
--- target colour: red, target object: prism ---
Decision Step reward: -3
Step: 250
Decisio

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17361/200000  | Episode Reward: 1.0  | Average Reward 6.12  | Actor loss: -0.35 | Critic loss: 2.86 | Entropy loss: -0.0207  | Total Loss: 2.49 | Total Steps: 166
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17362/200000  | Episode Reward: 1.0  | Average Reward 6.12  | Actor loss: -0.67 | Critic loss: 5.18 | Entropy loss: -0.0212  | Total Loss: 4.49 | Total Steps: 165
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17363/200000  | Episode Reward: 10.0  | Average Reward 6.12  | Actor loss: 0.25 | Critic loss: 1.38 | Entropy loss: -0.0075  | Total Loss: 1.62 | Total Steps: 93
--- target colour: yellow, target object: cy

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17386/200000  | Episode Reward: -2.0  | Average Reward 6.06  | Actor loss: -0.31 | Critic loss: 4.41 | Entropy loss: -0.0090  | Total Loss: 4.10 | Total Steps: 171
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17387/200000  | Episode Reward: 7.0  | Average Reward 6.04  | Actor loss: 0.08 | Critic loss: 3.11 | Entropy loss: -0.0246  | Total Loss: 3.17 | Total Steps: 129
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17388/200000  | Episode Reward: -8.0  | Average Reward 5.96  | Actor loss: 0.40 | Critic loss: 2.57 | Entropy loss: -0.0122  | Total Loss: 2.96 | Total Steps: 345
--- ta

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17413/200000  | Episode Reward: 7.0  | Average Reward 5.99  | Actor loss: -0.04 | Critic loss: 1.21 | Entropy loss: -0.0097  | Total Loss: 1.16 | Total Steps: 115
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17414/200000  | Episode Reward: 7.0  | Average Reward 5.99  | Actor loss: -0.30 | Critic loss: 3.31 | Entropy loss: -0.0089  | Total Loss: 3.00 | Total Steps: 152
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17415/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor loss: 0.01 | Critic loss: 3.09 | Entropy loss: -0.0050  | Total Loss: 3.10 | Total Steps: 96
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17440/200000  | Episode Reward: 7.0  | Average Reward 6.10  | Actor loss: -0.32 | Critic loss: 7.87 | Entropy loss: -0.0057  | Total Loss: 7.54 | Total Steps: 91
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17441/200000  | Episode Reward: 3.0  | Average Reward 6.07  | Actor loss: -0.93 | Critic loss: 6.06 | Entropy loss: -0.0216  | Total Loss: 5.11 | Total Steps: 115
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17442/200000  | Episode Reward: 7.0  | Average Reward 6.08  | Actor loss: -0.32 | Critic loss: 5.17 | Entropy loss: -0.0132  | Total Loss: 4.83 | Total Steps: 173
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17465/200000  | Episode Reward: 10.0  | Average Reward 5.97  | Actor loss: -0.09 | Critic loss: 0.89 | Entropy loss: -0.0019  | Total Loss: 0.80 | Total Steps: 19
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17466/200000  | Episode Reward: 10.0  | Average Reward 5.97  | Actor loss: 0.32 | Critic loss: 1.23 | Entropy loss: -0.0219  | Total Loss: 1.53 | Total Steps: 130
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17467/200000  | Episode Reward: 10.0  | Average Reward 6.00  | Actor loss: -0.27 | Critic loss: 3.45 | Entropy loss: -0.0225  | Total Loss: 3.16 | Total Steps: 148
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17468/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17492/200000  | Episode Reward: 4.0  | Average Reward 5.99  | Actor loss: 0.02 | Critic loss: 3.10 | Entropy loss: -0.0107  | Total Loss: 3.11 | Total Steps: 108
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17493/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor loss: 0.18 | Critic loss: 2.06 | Entropy loss: -0.0067  | Total Loss: 2.23 | Total Steps: 93
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17494/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor loss: 0.04 | Critic loss: 3.98 | Entropy loss: -0.0040  | Total Loss: 4.02 | Total Steps: 97
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps


Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17518/200000  | Episode Reward: 4.0  | Average Reward 6.01  | Actor loss: -0.12 | Critic loss: 1.29 | Entropy loss: -0.0101  | Total Loss: 1.16 | Total Steps: 144
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17519/200000  | Episode Reward: 10.0  | Average Reward 6.01  | Actor loss: 0.06 | Critic loss: 3.83 | Entropy loss: -0.0059  | Total Loss: 3.88 | Total Steps: 105
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17520/200000  | Episode Reward: 10.0  | Average Reward 6.03  | Actor loss: 0.23 | Critic loss: 4.38 | Entropy loss: -0.0017  | Total Loss: 4.61 | Total Steps: 19
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17544/200000  | Episode Reward: 10.0  | Average Reward 6.00  | Actor loss: 0.30 | Critic loss: 2.08 | Entropy loss: -0.0154  | Total Loss: 2.36 | Total Steps: 100
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17545/200000  | Episode Reward: 10.0  | Average Reward 6.00  | Actor loss: 0.02 | Critic loss: 2.29 | Entropy loss: -0.0066  | Total Loss: 2.30 | Total Steps: 109
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17546/200000  | Episode Reward: 10.0  | Average Reward 6.04  | Actor loss: -0.09 | Critic loss: 0.79 | Entropy loss: -0.0090  | Total Loss: 0.70 | Total Steps: 142
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17547/200000  | Episode Reward: 10.0  | Average Reward 6.05  | Actor lo

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17570/200000  | Episode Reward: 10.0  | Average Reward 6.00  | Actor loss: 0.16 | Critic loss: 2.38 | Entropy loss: -0.0014  | Total Loss: 2.53 | Total Steps: 17
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17571/200000  | Episode Reward: 1.0  | Average Reward 5.95  | Actor loss: -0.27 | Critic loss: 6.07 | Entropy loss: -0.0296  | Total Loss: 5.78 | Total Steps: 184
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17572/200000  | Episode Reward: -2.0  | Average Reward 5.92  | Actor loss: -0.44 | Critic loss: 6.36 | Entropy loss: -0.0105  | Total Loss: 5.90 | Total Steps: 143
--- target co

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17595/200000  | Episode Reward: -2.0  | Average Reward 5.87  | Actor loss: -0.59 | Critic loss: 3.07 | Entropy loss: -0.0258  | Total Loss: 2.46 | Total Steps: 182
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17596/200000  | Episode Reward: 10.0  | Average Reward 5.87  | Actor loss: 0.24 | Critic loss: 3.51 | Entropy loss: -0.0011  | Total Loss: 3.75 | Total Steps: 17
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17597/200000  | Episode Reward: 10.0  | Average Reward 5.92  | Actor loss: 0.17 | Critic loss: 1.29 | Entropy loss: -0.0040  | Total Loss: 1.46 | Total Steps: 90
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in termina

Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17621/200000  | Episode Reward: 10.0  | Average Reward 5.89  | Actor loss: 0.10 | Critic loss: 0.97 | Entropy loss: -0.0011  | Total Loss: 1.06 | Total Steps: 16
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17622/200000  | Episode Reward: 10.0  | Average Reward 5.89  | Actor loss: 0.24 | Critic loss: 5.76 | Entropy loss: -0.0013  | Total Loss: 6.00 | Total Steps: 24
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17623/200000  | Episode Reward: 4.0  | Average Reward 5.86  | Actor loss: -0.26 | Critic loss: 4.97 | Entropy loss: -0.0098  | Total Loss: 4.70 | Total Steps: 139
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17645/200000  | Episode Reward: 7.0  | Average Reward 5.58  | Actor loss: 0.00 | Critic loss: 1.99 | Entropy loss: -0.0074  | Total Loss: 1.99 | Total Steps: 157
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17646/200000  | Episode Reward: -5.0  | Average Reward 5.55  | Actor loss: 0.68 | Critic loss: 4.66 | Entropy loss: -0.0030  | Total Loss: 5.33 | Total Steps: 280
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Training  | Episode: 17647/200000  | Episode Reward: -2.0  | Average Reward 5.55  | Actor loss: -0.20 | Critic loss: 6

KeyboardInterrupt: 

In [6]:
# run this when you stop training prematurely
data = {
            'all_average_reward': all_average_reward,
            'all_episode_reward': all_episode_reward,
            'all_actor_loss': all_actor_loss,
            'all_critic_loss': all_critic_loss,
            'all_entropy_loss': all_entropy_loss,
            'all_total_loss': all_total_loss,
            'all_steps': all_steps,
        } 
file_path = f'result/{ALG_NAME}_{ENV_ID}.txt'
with open(file_path, 'w') as file:
    json.dump(data, file)

# Test

In [None]:
from torch.distributions import Categorical

device = torch.device("cpu")

episode = 9190
speed = 1
MAX_STEPS = 500
TEST_EPISODES = 100
ALG_NAME = 'S0_with'
ENV_ID = '3'
tracked_agent = -1

env.reset()
agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
agent.load(episode,ALG_NAME,ENV_ID)
average = 0
            
for episode in range(TEST_EPISODES):
            episode_reward = 0
            # env.reset()
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            STEPS = 0
            decision_steps, terminal_steps = env.get_steps(behavior_name)

            # state -- vt, lt, lstm        
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device) #vector image
            object_index = int(decision_steps.obs[1][0][0])
            colour_index = int(decision_steps.obs[1][0][1])
            print(f'--- target colour: {colour_hashmap[colour_index]}, target object: {object_hashmap[object_index]} ---')
            # objects: 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
            # colours: 0-red,1-green,2-blue,3-yellow,4-black 
            
#             lt = torch.eye(num_words)[:, index].to(device) #one hot encoder language vector
            lt = torch.zeros(35).to(device)
            lt[object_index],lt[colour_index+5] = 1,1 #one hot encoder language vector
        
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            done = False
            while not done:
                STEPS += 1                
                lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                # value = value.detach()
                dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
                

                action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
                # action_dist = Categorical(F.softmax(policy_dist,dim=1))
                action = action_dist.sample() # sample an action from action_dist
                action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()

                discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                action_tuple = ActionTuple()
                action_tuple.add_discrete(discrete_actions)
                env.set_actions(behavior_name,action_tuple)
                env.step()
                decision_steps, terminal_steps = env.get_steps(behavior_name)

                if tracked_agent == -1 and len(decision_steps) >= 1:
                    tracked_agent = decision_steps.agent_id[0]
                    # print(tracked_agent)

                if tracked_agent in terminal_steps: # roll over or hit the target
                    print('Agent in terminal steps')
                    done = True
                    reward = terminal_steps[tracked_agent].reward
                    if reward > 0:
                        pass
                    else: reward = -1 # roll over or other unseen conditions

                    print(f'Terminal Step reward: {reward}')

                elif tracked_agent in decision_steps: # the agent which requires action
                    reward = decision_steps[tracked_agent].reward
                    # print(f'Decision Step reward: {reward}')
                    # if reward<0:
                    #     print(f'Decision Step reward: {reward}')

                if STEPS >= MAX_STEPS:
                        reward = -10
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True

                episode_reward = episode_reward + reward
                vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                vt = vt_new
            average += episode_reward / TEST_EPISODES
            print(f'Episode: {episode}, Episode reward: {episode_reward}')
print(f'Average Episode Reward: {average}')


# Random Action

In [None]:
# Try to find the optimized MAX_STEP
# speed 1: 10000+ speed 2:1822 speed 3: 1918 speed 4: 900 speed 5: 951 speed 6:964 speed 7: 1181 
import random
import torch
import torch.nn.functional as F
# speed = 1
TRAIN_EPISODES = 20
tracked_agent = -1
num_actions = 4
average = 0
# for speed in range(30,70,5):
#     speed = speed/10
speed = 3
for episode in range(TRAIN_EPISODES):
    env.reset()
    behavior_name=list(env.behavior_specs)[0]
    step = 0
    while True:
        index = random.randint(0, 3) # sample an action from action_dist
        action_onehot = F.one_hot(torch.tensor(index),num_actions).cpu()
        discrete_actions = np.array(action_onehot).reshape(1,4)*speed #[forward, backward, right, left]
        action_tuple = ActionTuple()
        action_tuple.add_discrete(discrete_actions)
        env.set_actions(behavior_name,action_tuple)
        env.step()
        step += 1

        decision_steps, terminal_steps = env.get_steps(behavior_name)
        if tracked_agent == -1 and len(decision_steps) >= 1:
            tracked_agent = decision_steps.agent_id[0]
            
        if tracked_agent in terminal_steps: # roll over or hit the target
            reward = terminal_steps[tracked_agent].reward
            if reward > 0: # hit the target
                print(f'{episode}: {step} in total')
                average += step
                break
            else:           # roll over or other conditions
                env.reset()
                step = 0
                continue # roll over or other unseen conditions
        if tracked_agent in decision_steps: # the agent which requires action
            continue
average /= TRAIN_EPISODES
print(f'For speed {speed}, average random step for hitting the target is {average}')
      

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
object_sizes = [1, 2, 3, 4, 5]  # Object sizes on the x-axis
max_steps = [[4721, 2468, 1587, 1249, 1084],   # Max steps for each object size
             [4027, 1550, 738, 640, 505],
             [3320, 1391, 464, 331, 307],
             [3289, 882, 679, 424, 211],
             [3057, 1203, 470, 378, 222]]
speeds = [1, 2, 3, 4, 5]  # Discrete speeds for color-coding

# Color mapping for each speed
speed_color_mapping = {
    1: 'red',
    2: 'blue',
    3: 'green',
    4: 'orange',
    5: 'purple'
}

# Generate scatter plot
for i, size in enumerate(object_sizes):
    for j, steps in enumerate(max_steps[i]):
        speed = speeds[j]
        color = speed_color_mapping[speed]
        plt.scatter(size, steps, c=color)

# Set labels and title
plt.xlabel('Object Size')
plt.ylabel('Max Step')
plt.title('Scatter Plot')

plt.xlim(0, 6)
plt.xticks(np.arange(0, 7, 1))
plt.ylim(0, 5000)
plt.yticks(np.arange(0, 5001, 200))

# Create legend
legend_labels = [f'Speed {speed}' for speed, _ in speed_color_mapping.items()]
plt.legend(legend_labels, loc='upper right')

# Display the plot
plt.show()


