In [1]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) 

cuda


# Imports and Initialise Env

In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
import numpy as np
from mlagents_envs.environment import ActionTuple

In [2]:
file_name = "C:\\Users\\Palaash.HPZ\\Desktop\\RL-concept-learning_large_build_envs\\build_envs\\windows\\S2 180723\\build"

env =  UE(file_name=file_name,seed=1,side_channels=[],worker_id=1,no_graphics = False)
env.reset()

behavior_names = list(env.behavior_specs.keys())
behavior_value = list(env.behavior_specs.values())
DecisionSteps, TerminalSteps = env.get_steps(behavior_names[0])
agentsNum = len(DecisionSteps.agent_id)

In [13]:
# env.close()

#  Model

In [3]:
#
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

vision_output_dim = 3136
num_words = 35  # Number of unique words in the vocabulary
language_output_dim = 128
embedding_dim = 128
mixing_dim = 256
lstm_hidden_dim = 256
num_actions = 4

# (3,128,128) --> (64,7,7) = 3136 (3-layer CNN)
class VisualModule(nn.Module): 
    def __init__(self):
        super(VisualModule, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=3, padding=0),
            nn.ReLU()
        )
        # self.conv = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(128, 64, kernel_size=5, stride=2, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

    def forward(self, vt):
        encoded_vt = self.conv(vt)
        return encoded_vt.view(vt.size(0), -1).squeeze()

# one-hot encoding [0 0 1 0 0] --> 128 dimensional embedding (FF)
# S1:5 S2:5 S3:11 S4:9 --> 30 + 5 (noun) = 35 in total
class LanguageModule(nn.Module): 
    def __init__(self, num_words, embedding_dim):
        super(LanguageModule, self).__init__()
        self.embedding = nn.Linear(num_words, embedding_dim)

    def forward(self, lt):
        embedded_lt = self.embedding(lt)
        return embedded_lt

# 3136(vision) + 128 (language) --> 256 dimensional embedding (FF)
class MixingModule(nn.Module):
    def __init__(self, vision_output_dim, language_output_dim, mixing_dim):
        super(MixingModule, self).__init__()
        self.linear = nn.Linear(vision_output_dim + language_output_dim, mixing_dim)

    def forward(self, vision_output, language_output):
        combined_output = torch.cat((vision_output, language_output), dim=0)
        mixed_output = self.linear(combined_output)
        return mixed_output

class LSTMModule(nn.Module):
    def __init__(self,mixing_dim,lstm_hidden_dim):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTMCell(mixing_dim, lstm_hidden_dim)
    
    def forward(self,mixed_output,lstm_hidden_state):
        lstm_hidden_state = self.lstm(mixed_output, lstm_hidden_state) 
        # lstm_output = lstm_hidden_state[0] # output is (hidden_state,cell_state), we need hidden state, shape (1,256)
        return lstm_hidden_state

class Agent(nn.Module):
    def __init__(self, num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions):
        super(Agent, self).__init__()
        self.language_module = LanguageModule(num_words, embedding_dim)
        self.visual_module = VisualModule()
        self.mixing_module = MixingModule(vision_output_dim, language_output_dim, mixing_dim)
        self.lstm_module = LSTMModule(mixing_dim, lstm_hidden_dim)
        self.action_predictor = nn.Linear(lstm_hidden_dim, num_actions)
        self.value_estimator = nn.Linear(lstm_hidden_dim, 1)

    def forward(self, vt, lt, lstm_hidden_state):
        vision_output = self.visual_module(vt)
        language_output = self.language_module(lt)
        mixed_output = self.mixing_module(vision_output, language_output).unsqueeze(0)
        lstm_output = self.lstm_module(mixed_output,lstm_hidden_state)
        action_probs = self.action_predictor(lstm_output[0]) 
        value_estimate = self.value_estimator(lstm_output[0])
        return action_probs,value_estimate,lstm_output
        
        
    def save(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.state_dict(), os.path.join(path, f'agent_{episode}.pt'))

    def load(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
        self.load_state_dict(torch.load(os.path.join(path, f'agent_{episode}.pt')))   
        
#     def load(self,path):
#         self.load_state_dict(torch.load(path))

# Train

1: with skip steps: set speed=1, lr = 3.5e-5  #LR
2: without skip steps: set speed=3, lr = 3.5e-5  #LR
3: lr = 5e-5  #LR
4: lr = 10e-5  #LR

increase LR will learn faster but training may collapse if too fast

reward structure

hit target: +10
hit wall: -1
hit wrong target: -3
max steps reached: -10

s1a 100,000 20 hrs

In [None]:
# 
import argparse
import time
import json
# import matplotlib.pyplot as plt
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
# add arguments in command --train/test
# parser = argparse.ArgumentParser(description='Train or test neural net motor controller.')
# parser.add_argument('--train', dest='train', action='store_true', default=False)
# parser.add_argument('--test', dest='test', action='store_true', default=True)
# args = parser.parse_args()

train = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device) 

# load from S0 - these are the S0 best model details
S0_ALG_NAME = 'S0'
S0_ENV_ID = '3'
S0_episode = 7593

ALG_NAME = 'S2'
ENV_ID = '1'
TRAIN_EPISODES = 200000  # number of overall episodes for training
MAX_STEPS = 500  # maximum time step in one episode
LAM = 0.95  # reward discount in TD error
# env_per_iteration = 100
lr = 3.5e-5  #LR
speed = 3
num_steps = 250 # the step for updating the network
max_step_reward = -10

if __name__ == '__main__':
    agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
    agent.load(S0_episode,S0_ALG_NAME,S0_ENV_ID)
    agent.to(device)
    optimizer = optim.RMSprop(agent.parameters(), lr=lr)
    best_score = float('-inf') #this is -infinity
    
    object_hashmap = {
    0: 'capsule',
    1: 'cube',
    2: 'cylinder',
    3: 'prism',
    4: 'sphere'}
    colour_hashmap = {
    0: 'red',
    1: 'green',
    2: 'blue',
    3: 'yellow',
    4: 'black'}
    
    if train:
        entropy_term = 0
        all_episode_reward = []
        all_average_reward = []
        all_steps = []
        all_actor_loss = []
        all_critic_loss = []
        all_entropy_loss = []
        all_total_loss = []
        tracked_agent = -1
        
        for episode in range(TRAIN_EPISODES):
            t0 = time.time()
            episode_reward = 0
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            STEPS = 0
            decision_steps, terminal_steps = env.get_steps(behavior_name)
            # state -- vt, lt, lstm
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device) #vector image
            object_index = int(decision_steps.obs[1][0][0])
            colour_index = int(decision_steps.obs[1][0][1])
            print(f'--- target colour: {colour_hashmap[colour_index]}, target object: {object_hashmap[object_index]} ---')
            # objects: 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
            # colours: 0-red,1-green,2-blue,3-yellow,4-black 
            
#             lt = torch.eye(num_words)[:, index].to(device) #one hot encoder language vector
            lt = torch.zeros(35).to(device)
            lt[object_index],lt[colour_index+5] = 1,1 #one hot encoder language vector
            
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            
            done = False
            while not done: #training loop
                
                # Need to use when calculating the loss
                log_probs = []
                values = torch.empty(0).to(device)
                rewards = []

                for steps in range(num_steps):
#                     if STEPS % 5 == 0: #this is the skip steps
#                         lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
#                         policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
#                     STEPS += 1
                    lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                    policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                    STEPS += 1
                    dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy() #use softmax to get prob dist

                    action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1)) #put prob dist into Categorical class to sample
                    action = action_dist.sample() # sample an action from action_dist
                    action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
                    
                    log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                    # entropy = -np.sum(np.mean(dist)* np.log(dist))
                    entropy = F.cross_entropy(policy_dist.detach(), action)

                    discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                    action_tuple = ActionTuple()
                    action_tuple.add_discrete(discrete_actions)
                    env.set_actions(behavior_name,action_tuple)
                    
                    env.step()
                    decision_steps, terminal_steps = env.get_steps(behavior_name)

                    if tracked_agent == -1 and len(decision_steps) >= 1: #means agent still req action - eps not ended yet
                        tracked_agent = decision_steps.agent_id[0]

                    if tracked_agent in terminal_steps: # roll over or hit the target
                        print('Agent in terminal steps')
                        done = True
                        reward = terminal_steps[tracked_agent].reward
                        if reward > 0:
                            pass
                        else: reward = -1 # roll over or other unseen conditions
                        print(f'Terminal Step reward: {reward}')

                    elif tracked_agent in decision_steps: # the agent which requires action
                        reward = decision_steps[tracked_agent].reward
                        if reward<0:
                            if reward==-2.5:
                                reward=-3
                                print(f'Decision Step reward: {reward}')
                            # if reward<-1: hit = 1
                            
                    if STEPS >= MAX_STEPS:
                        reward = max_step_reward
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True
                    if STEPS % num_steps == 0:
                        print (f'Step: {STEPS}')

                    episode_reward = episode_reward + reward

                    rewards.append(reward)
                    # values.append(value)
                    values = torch.cat((values, value), dim=0)
                    log_probs.append(log_prob)
                    entropy_term = entropy_term + entropy
                    vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                    vt = vt_new

                    if steps == num_steps-1:
                        break
                
                
                discounted_rewards = np.zeros_like(values.cpu().detach().numpy())
                cumulative = 0
                # print(len(rewards))
                for t in reversed(range(len(rewards))):
                    cumulative = rewards[t] + LAM * cumulative # Monte Carlo
                    discounted_rewards[t] = cumulative
                # print(f'rewards:{rewards}, discounted_rewards:{discounted_rewards}')
                # Advantage Actor Critic

                # Qvals[-1] = rewards[t] + LAM * Qval      or       Qvals[-1] = rewards[t]                   
                # for t in range(len(rewards)-1):
                #         Qvals[t] = rewards[t] + LAM * values[t+1]
                
                # r_(t+1) = R(s_t|a_t)--> reward[t]        a_t, V_t = agent(s_t)
                # A_t = r_(t+1) + LAM * V_(t+1) - V_t 
                #     = Q_t - V_t
                
                # Monte Carlo Advantage = reward + LAM * cumulative_reward
                # Actor_loss = -log(pai(s_t|a_t))*A_t
                # Critic_loss = A_t.pow(2) *0.5
                # Entropy_loss = -F.entropy(pai(St),index) * 0.001

                # entropy = -np.sum(np.mean(dist) * np.log(dist))
                
                #update actor critic
                
                # values = torch.FloatTensor(values).requires_grad_(True).to(device)
                discounted_rewards = torch.FloatTensor(discounted_rewards.astype(np.float32)).to(device)
                log_probs = torch.stack(log_probs)
                advantage = discounted_rewards - values
                actor_loss = (-log_probs * advantage).mean()
                critic_loss = 0.5 * torch.square(advantage).mean()
                entropy_term /= num_steps
                entropy_loss = -0.1 * entropy_term
                ac_loss = actor_loss + critic_loss + entropy_loss
                # ac_loss = values.mean()
                optimizer.zero_grad()
                ac_loss.backward()
                optimizer.step()
                # print('updated')
                # for name, param in agent.named_parameters():
                #     if param.grad is not None:
                #         print(name, param.grad)
                #     else:
                #         print(name, "gradients not computed")
                # for name, param in agent.named_parameters():
                #     if name == 'value_estimator.weight':
                #         print(name, param)
            
#                 if done: break


            all_episode_reward.append(float(episode_reward))
            all_steps.append(STEPS)
            all_actor_loss.append(float(actor_loss))
            all_critic_loss.append(float(critic_loss))
            all_entropy_loss.append(float(entropy_loss))
            all_total_loss.append(float(ac_loss))
            if episode >= 200:
                avg_score = np.mean(all_episode_reward[-200:])
                all_average_reward.append(avg_score)
                if avg_score > best_score:
                    best_score = avg_score
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print(f'-----The best score for averaging previous 200 episode reward is {best_score}. Model has been saved-----')
                print('Training  | Episode: {}/{}  | Episode Reward: {:.1f}  | Average Reward {:.2f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, avg_score, actor_loss, critic_loss,entropy_loss,  ac_loss, STEPS))
            else:  print('Training  | Episode: {}/{}  | Episode Reward: {:.1f}  | Actor loss: {:.2f} | Critic loss: {:.2f} | Entropy loss: {:.4f}  | Total Loss: {:.2f} | Total Steps: {}' \
                    .format(episode + 1, TRAIN_EPISODES, episode_reward, actor_loss, critic_loss, entropy_loss,  ac_loss, STEPS))
            if episode%5000 == 0:
                    agent.save(episode, ALG_NAME, ENV_ID)
                    print("Model has been saved")
        print(all_average_reward)
        agent.save(episode ,ALG_NAME, ENV_ID)
        print("Model has been saved")

        data = {
                    'all_average_reward': all_average_reward,
                    'all_episode_reward': all_episode_reward,
                    'all_actor_loss': all_actor_loss,
                    'all_critic_loss': all_critic_loss,
                    'all_entropy_loss': all_entropy_loss,
                    'all_total_loss': all_total_loss,
                    'all_steps': all_steps,
                } 
        file_path = f'result/{ALG_NAME}_{ENV_ID}.txt'
        with open(file_path, 'w') as file:
            json.dump(data, file)

cuda
--- target colour: blue, target object: prism ---




Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1/200000  | Episode Reward: -40.0  | Actor loss: -0.86 | Critic loss: 59.88 | Entropy loss: -0.0109  | Total Loss: 59.01 | Total Steps: 250
Model has been saved
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0


Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 11/200000  | Episode Reward: 49.0  | Actor loss: -0.29 | Critic loss: 9.17 | Entropy loss: -0.0266  | Total Loss: 8.86 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 21/200000  | Episode Reward: 46.0  | Actor loss: -0.16 | Critic loss: 7.37 | Entropy loss: -0.0213  | Total Loss: 7.19 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 31/200000  | Episode Reward: 42.0  | Actor loss: 0.10 | Critic loss: 6.26 | Entropy loss: -0.0209  | Total Loss: 6.34 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 41/200000  | Episode Reward: 50.0  | Actor loss: 0.02 | Critic loss: 6.86 | Entropy loss: -0.0245  | Total Loss: 6.85 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3


Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 51/200000  | Episode Reward: 27.0  | Actor loss: -0.50 | Critic loss: 14.67 | Entropy loss: -0.0306  | Total Loss: 14.13 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal step

Step: 250
Training  | Episode: 61/200000  | Episode Reward: 48.0  | Actor loss: 0.08 | Critic loss: 8.82 | Entropy loss: -0.0227  | Total Loss: 8.88 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 62/200000  | Episode Reward: 34.0  | Actor loss: -0.09 | Critic loss: 10.53 | Entropy loss

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 72/200000  | Episode Reward: 56.0  | Actor loss: 0.16 | Critic loss: 18.60 | Entropy loss: -0.0253  | Total Loss: 18.74 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 82/200000  | Episode Reward: 30.0  | Actor loss: -0.08 | Critic loss: 6.62 | Entropy loss: -0.0296  | Total Loss: 6.51 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 93/200000  | Episode Reward: 18.0  | Actor loss: -0.49 | Critic loss: 8.68 | Entropy loss: -0.0288  | Total Loss: 8.16 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
T

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 103/200000  | Episode Reward: 37.0  | Actor loss: -0.08 | Critic loss: 9.96 | Entropy loss: -0.0203  | Total Loss: 9.86 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 114/200000  | Episode Reward: 26.0  | Actor loss: -0.16 | Critic loss: 7.48 | Entropy loss: -0.0194  | Total Loss: 7.30 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 124/200000  | Episode Reward: 57.0  | Actor loss: 0.16 | Critic loss: 13.93 | Entropy loss: -0.0189  | Total Loss: 14.07 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3


Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 133/200000  | Episode Reward: 53.0  | Actor loss: 0.20 | Critic loss: 11.14 | Entropy loss: -0.0263  | Total Loss: 11.31 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 144/200000  | Episode Reward: 72.0  | Actor loss: 1.01 | Critic loss: 12.29 | Entropy loss: -0.0264  | Total Loss: 13.27 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step re

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 154/200000  | Episode Reward: 20.0  | Actor loss: -0.76 | Critic loss: 12.94 | Entropy loss: -0.0290  | Total Loss: 12.15 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 164/200000  | Episode Reward: 44.0  | Actor loss: 0.08 | Critic loss: 6.78 | Entropy loss: -0.0240  | Total Loss: 6.84 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 175/200000  | Episode Reward: 62.0  | Actor loss: 0.39 | Critic loss: 14.01 | Entropy loss: -0.0179  | Total Loss: 14.39 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 185/200000  | Episode Reward: 59.0  | Actor loss: 0.18 | Critic loss: 7.23 | Entropy loss: -0.0210  | Total Loss: 7.39 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 195/200000  | Episode Reward: 40.0  | Actor loss: -0.35 | Critic loss: 7.83 | Entropy loss: -0.0173  | Total Loss: 7.46 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 36.64. Model has been saved-----
Training  | Episode: 205/200000  | Episode Reward: 53.0  | Average Reward 36.64  | Actor loss: -0.01 | Critic loss: 8.23 | Entropy loss: -0.0168  | Total Loss: 8.21 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 215/200000  | Episode Reward: 33.0  | Average Reward 36.41  | Actor loss: -0.31 | Critic loss: 4.67 | Entropy loss: -0.0219  | Total Loss: 4.34 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 225/200000  | Episode Reward: 55.0  | Average Reward 36.49  | Actor loss: 0.04 | Critic loss: 6.52 | Entropy loss: -0.0131  | Total Loss: 6.55 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 235/200000  | Episode Reward: 19.0  | Average Reward 36.08  | Actor loss: -0.28 | Critic loss: 16.02 | Entropy loss: -0.0188  | Total Loss: 15.72 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 244/200000  | Episode Reward: 40.0  | Average Reward 36.59  | Actor loss: 0.05 | Critic loss: 7.92 | Entropy loss: -0.0180  | Total Loss: 7.95 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 37.155. Model has been saved-----
Training  | Episode: 253/200000  | Episode Reward: 46.0  | Average Reward 37.16  | Actor loss: -0.03 | Critic loss: 5.79 | Entropy loss: -0.0180  | Total Loss: 5.74 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 262/200000  | Episode Reward: 34.0  | Average Reward 37.47  | Actor loss: -0.26 | Critic loss: 7.26 | Entropy loss: -0.0165  | Total Loss: 6.99 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 271/200000  | Episode Reward: 47.0  | Average Reward 38.15  | Actor loss: -0.07 | Critic loss: 9.34 | Entropy loss: -0.0147  | Total Loss: 9.26 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 39.36. Model has been saved-----
Training  | Episode: 280/200000  | Episode Reward: 45.0  | Average Reward 39.36  | Actor loss: 0.02 | Critic loss: 8.78 | Entropy loss: -0.0094  | Total Loss: 8.80 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Te

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 40.31. Model has been saved-----
Training  | Episode: 289/200000  | Episode Reward: 53.0  | Average Reward 40.31  | Actor loss: -0.07 | Critic loss: 8.92 | Entropy loss: -0.0125  | Total Loss: 8.84 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 298/200000  | Episode Reward: 20.0  | Average Reward 40.62  | Actor loss: -0.55 | Critic loss: 8.84 | Entropy loss: -0.0165  | Total Loss: 8.27 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 307/200000  | Episode Reward: 40.0  | Average Reward 40.96  | Actor loss: -0.50 | Critic loss: 6.39 | Entropy loss: -0.0179  | Total Loss: 5.87 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 316/200000  | Episode Reward: 12.0  | Average Reward 41.23  | Actor loss: -0.37 | Critic loss: 9.83 | Entropy loss: -0.0154  | Total Loss: 9.45 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3


Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 42.17. Model has been saved-----
Training  | Episode: 325/200000  | Episode Reward: 46.0  | Average Reward 42.17  | Actor loss: -0.05 | Critic loss: 5.04 | Entropy loss: -0.0103  | Total Loss: 4.98 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 42.835. Model has been saved-----
Training  | Episode: 334/200000  | Episode Reward: 63.0  | Average Reward 42.84  | Actor loss: 0.10 | Critic loss: 13.62 | Entropy loss: -0.0100  | Total Loss: 13.70 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 344/200000  | Episode Reward: 24.0  | Average Reward 43.06  | Actor loss: -0.65 | Critic loss: 12.08 | Entropy loss: -0.0252  | Total Loss: 11.40 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Step: 250
Training  | Episode: 353/200000  | Episode Reward: 50.0  | Average Reward 42.98  | Actor loss: -0.11 | Critic loss: 14.99 | Entropy loss: -0.0170  | Total Loss: 14.86 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 354/200000  | Episode Reward: 18.0  | Aver

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 43.69. Model has been saved-----
Training  | Episode: 362/200000  | Episode Reward: 37.0  | Average Reward 43.69  | Actor loss: -0.06 | Critic loss: 7.73 | Entropy loss: -0.0113  | Total Loss: 7.66 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal step

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 372/200000  | Episode Reward: 40.0  | Average Reward 43.13  | Actor loss: -0.20 | Critic loss: 6.06 | Entropy loss: -0.0184  | Total Loss: 5.84 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 382/200000  | Episode Reward: 60.0  | Average Reward 43.27  | Actor loss: -0.04 | Critic loss: 12.43 | Entropy loss: -0.0167  | Total Loss: 12.37 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 391/200000  | Episode Reward: 56.0  | Average Reward 43.47  | Actor loss: 0.06 | Critic loss: 9.96 | Entropy loss: -0.0231  | Total Loss: 9.99 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 392/200000  | Episode Reward: 49.0  | Average Reward 43.45  | Actor

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 44.075. Model has been saved-----
Training  | Episode: 401/200000  | Episode Reward: 88.0  | Average Reward 44.08  | Actor loss: 0.53 | Critic loss: 13.34 | Entropy loss: -0.0145  | Total Loss: 13.86 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 410/200000  | Episode Reward: 34.0  | Average Reward 44.10  | Actor loss: -0.11 | Critic loss: 9.97 | Entropy loss: -0.0192  | Total Loss: 9.84 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 420/200000  | Episode Reward: 30.0  | Average Reward 44.41  | Actor loss: -0.16 | Critic loss: 6.63 | Entropy loss: -0.0154  | Total Loss: 6.45 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 44.595. Model has been saved-----
Training  | Episode: 430/200000  | Episode Reward: 63.0  | Average Reward 44.59  | Actor loss: 0.11 | Critic loss: 11.31 | Entropy loss: -0.0176  | Total Loss: 11.40 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 45.605. Model has been saved-----
Training  | Episode: 439/200000  | Episode Reward: 69.0  | Average Reward 45.60  | Actor loss: 0.13 | Critic loss: 13.96 | Entropy loss: -0.0145  | Total Loss: 14.08 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 440/200000  | Episode Reward: 49.0  | Ave

Training  | Episode: 448/200000  | Episode Reward: 73.0  | Average Reward 45.70  | Actor loss: 0.21 | Critic loss: 15.03 | Entropy loss: -0.0122  | Total Loss: 15.22 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 458/200000  | Episode Reward: 49.0  | Average Reward 45.34  | Actor loss: 0.40 | Critic loss: 9.73 | Entropy loss: -0.0180  | Total Loss: 10.10 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 468/200000  | Episode Reward: -5.0  | Average Reward 44.38  | Actor loss: -0.88 | Critic loss: 13.88 | Entropy loss: -0.0169  | Total Loss: 12.99 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 478/200000  | Episode Reward: 43.0  | Average Reward 44.45  | Actor loss: -0.71 | Critic loss: 8.84 | Entropy loss: -0.0230  | Total Loss: 8.11 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps


Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 488/200000  | Episode Reward: 47.0  | Average Reward 44.23  | Actor loss: -0.02 | Critic loss: 7.24 | Entropy loss: -0.0193  | Total Loss: 7.20 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 497/200000  | Episode Reward: 36.0  | Average Reward 44.07  | Actor loss: -0.28 | Critic loss: 6.73 | Entropy loss: -0.0142  | Total Loss: 6.44 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 507/200000  | Episode Reward: 34.0  | Average Reward 44.09  | Actor loss: -0.15 | Critic loss: 6.28 | Entropy loss: -0.0076  | Total Loss: 6.12 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 517/200000  | Episode Reward: 48.0  | Average Reward 44.35  | Actor loss: 0.14 | Critic loss: 5.40 | Entropy loss: -0.0157  | Total Loss: 5.52 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 527/200000  | Episode Reward: 56.0  | Average Reward 43.94  | Actor loss: 0.11 | Critic loss: 7.33 | Entropy loss: -0.0126  | Total Loss: 7.43 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0


Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 537/200000  | Episode Reward: 44.0  | Average Reward 43.84  | Actor loss: -0.17 | Critic loss: 7.41 | Entropy loss: -0.0134  | Total Loss: 7.23 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps


Step: 250
Training  | Episode: 546/200000  | Episode Reward: 46.0  | Average Reward 44.79  | Actor loss: -0.13 | Critic loss: 5.93 | Entropy loss: -0.0141  | Total Loss: 5.78 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 547/200000  | Episode Reward: 46.0  | Average Reward 44.99  | Actor loss: 0.04 | Critic loss: 5.64 | Entropy loss: -0.0128  | Total Loss: 5.67 | Total Steps: 250
--- t

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 556/200000  | Episode Reward: 34.0  | Average Reward 45.47  | Actor loss: -0.19 | Critic loss: 6.04 | Entropy loss: -0.0158  | Total Loss: 5.84 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 565/200000  | Episode Reward: 27.0  | Average Reward 45.49  | Actor loss: -0.54 | Critic loss: 13.71 | Entropy loss: -0.0127  | Total Loss: 13.16 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 566/200000  | Episode Reward: 33.0  | Average Reward 45.49  | Actor loss: -0.08 | Critic loss: 4.81 | Entropy lo

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 575/200000  | Episode Reward: 34.0  | Average Reward 45.73  | Actor loss: -0.35 | Critic loss: 5.49 | Entropy loss: -0.0214  | Total Loss: 5.12 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 45.995. Model has been saved-----
Training  | Episode: 584/200000  | Episode Reward: 53.0  | Average Reward 45.99  | Actor loss: -0.21 | Critic loss: 8.37 | Entropy loss: -0.0161  | Total Loss: 8.14 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 46.33. Model has been saved-----
Training  | Episode: 593/200000  | Episode Reward: 72.0  | Average Reward 46.33  | Actor loss: 0.21 | Critic loss: 11.32 | Entropy loss: -0.0159  | Total Loss: 11.51 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 594/200000  | Episode Reward: 18.0  | Avera

Decision Step reward: -3
Step: 250
Training  | Episode: 602/200000  | Episode Reward: 47.0  | Average Reward 46.35  | Actor loss: -0.01 | Critic loss: 7.36 | Entropy loss: -0.0053  | Total Loss: 7.35 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 603/200000  | Episode Reward: 66.0  | Aver

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 611/200000  | Episode Reward: 40.0  | Average Reward 46.74  | Actor loss: -0.12 | Critic loss: 7.71 | Entropy loss: -0.0109  | Total Loss: 7.58 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 47.015. Model has been saved-----
Training  | Episode: 621/200000  | Episode Reward: 30.0  | Average Reward 47.02  | Actor loss: -0.34 | Critic loss: 6.96 | Entropy loss: -0.0135  | Total Loss: 6.61 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3


Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 630/200000  | Episode Reward: 28.0  | Average Reward 46.82  | Actor loss: -0.22 | Critic loss: 16.35 | Entropy loss: -0.0122  | Total Loss: 16.11 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Traini

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 640/200000  | Episode Reward: 15.0  | Average Reward 46.19  | Actor loss: -0.48 | Critic loss: 7.45 | Entropy loss: -0.0150  | Total Loss: 6.96 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 649/200000  | Episode Reward: 67.0  | Average Reward 46.27  | Actor loss: 0.26 | Critic loss: 15.44 | Entropy loss: -0.0183  | Total Loss: 15.68 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 650/200000  | Episode Reward: 49.0  | Average Reward 46.34  | Actor loss: -0.01 | Critic loss: 9.42 | Entropy loss: -0.0136  | Total Loss: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 659/200000  | Episode Reward: 63.0  | Average Reward 46.72  | Actor loss: -0.07 | Critic loss: 8.74 | Entropy loss: -0.0097  | Total Loss: 8.66 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 47.77. Model has been saved-----
Training  | Episode: 668/200000  | Episode Reward: 56.0  | Average Reward 47.77  | Actor loss: 0.01 | Critic loss: 7.72 | Entropy loss: -0.0118  | Total Loss: 7.72 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Training  | Episode: 676/200000  | Episode Reward: 82.0  | Average Reward 47.91  | Actor loss: 0.06 | Critic loss: 19.28 | Entropy loss: -0.0156  | Total Loss: 19.33 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 48.055. Model has been saved--

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 685/200000  | Episode Reward: 73.0  | Average Reward 47.72  | Actor loss: 0.28 | Critic loss: 12.95 | Entropy loss: -0.0192  | Total Loss: 13.22 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 695/200000  | Episode Reward: 45.0  | Average Reward 47.90  | Actor loss: -0.07 | Critic loss: 3.37 | Entropy loss: -0.0121  | Total Loss: 3.29 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 704/200000  | Episode Reward: 43.0  | Average Reward 48.02  | Actor loss: 0.28 | Critic loss: 11.05 | Entropy loss: -0.0163  | Total Loss: 11.31 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 705/200000  | Episode Reward: 46.0  |

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 714/200000  | Episode Reward: 88.0  | Average Reward 48.06  | Actor loss: 0.33 | Critic loss: 16.32 | Entropy loss: -0.0147  | Total Loss: 16.64 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 715/200000  | 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 724/200000  | Episode Reward: 41.0  | Average Reward 48.17  | Actor loss: -0.14 | Critic loss: 6.57 | Entropy loss: -0.0069  | Total Loss: 6.43 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 48.38. Model has been saved-----
Training  | Episode: 733/200000  | Episode Reward: 85.0  | Average Reward 48.38  | Actor loss: 0.45 | Critic loss: 12.92 | Entropy loss: -0.0145  | Total Loss: 13.35 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 742/200000  | Episode Reward: 66.0  | Average Reward 48.42  | Actor loss: 0.20 | Critic loss: 8.41 | Entropy loss: -0.0095  | Total Loss: 8.60 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 751/200000  | Episode Reward: 50.0  | Average Reward 48.59  | Actor loss: -0.04 | Critic loss: 6.11 | Entropy loss: -0.0052  | Total Loss: 6.07 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 752/200000  | Episode Reward: 27.0  | Average Reward 48.35  | Actor loss: -0.11 | Critic loss

Step: 250
Training  | Episode: 760/200000  | Episode Reward: 60.0  | Average Reward 48.28  | Actor loss: -0.01 | Critic loss: 10.74 | Entropy loss: -0.0163  | Total Loss: 10.71 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 761/200000  | Episode Reward: 69.0  | Average Reward 48.52  | Actor loss: 0.74 | Critic 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 770/200000  | Episode Reward: 59.0  | Average Reward 48.33  | Actor loss: -0.09 | Critic loss: 7.15 | Entropy loss: -0.0133  | Total Loss: 7.05 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 779/200000  | Episode Reward: 60.0  | Average Reward 48.88  | Actor loss: -0.18 | Critic loss: 13.50 | Entropy loss: -0.0120  | Total Loss: 13.31 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 780/200000  | Episode Reward: 26.0  | Average Reward 48.77  | Actor loss: -0.51 | Critic los

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 789/200000  | Episode Reward: 41.0  | Average Reward 48.48  | Actor loss: -0.19 | Critic loss: 6.39 | Entropy loss: -0.0098  | Total Loss: 6.19 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 798/200000  | Episode Reward: 56.0  | Average Reward 47.95  | Actor loss: 0.05 | Critic loss: 5.53 | Entropy loss: -0.0176  | Total Loss: 5.56 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 799/200000  

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 808/200000  | Episode Reward: 52.0  | Average Reward 47.80  | Actor loss: 0.09 | Critic loss: 6.40 | Entropy loss: -0.0145  | Total Loss: 6.47 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 817/200000  | Episode Reward: 47.0  | Average Reward 48.20  | Actor loss: -0.27 | Critic loss: 7.85 | Entropy loss: -0.0092  | Total Loss: 7.57 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 827/200000  | Episode Reward: 49.0  | Average Reward 48.68  | Actor loss: -0.01 | Critic loss: 5.73 | Entropy loss: -0.0122  | Total Loss: 5.71 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 837/200000  | Episode Reward: 47.0  | Average Reward 48.01  | Actor loss: -0.32 | Critic loss: 8.71 | Entropy loss: -0.0135  | Total Loss: 8.38 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps


Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 846/200000  | Episode Reward: 50.0  | Average Reward 48.45  | Actor loss: -0.29 | Critic loss: 9.25 | Entropy loss: -0.0168  | Total Loss: 8.95 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 855/200000  | Episode Reward: 53.0  | Average Reward 48.67  | Actor loss: 0.01 | Critic loss: 8.59 | Entropy loss: -0.0068  | Total Loss: 8.59 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 864/200000  | Episode Reward: 79.0  | Average Reward 48.45  | Actor loss: 0.15 | Critic loss: 17.28 | Entropy loss: -0.0082  | Total Loss: 17.42 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 865/200000  | Episode Reward: 62.0  | Average Reward 48.42  | 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 874/200000  | Episode Reward: 47.0  | Average Reward 48.35  | Actor loss: -0.26 | Critic loss: 6.22 | Entropy loss: -0.0100  | Total Loss: 5.94 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 883/200000  | Episode Reward: 24.0  | Average Reward 48.82  | Actor loss: -0.39 | Critic loss: 6.35 | Entropy loss: -0.0120  | Total Loss: 5.94 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 893/200000  | Episode Reward: 66.0  | Average Reward 48.67  | Actor loss: 0.08 | Critic loss: 8.34 | Entropy loss: -0.0186  | Total Loss: 8.40 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 902/200000  | Episode Reward: 24.0  | Average Reward 48.48  | Actor loss: -0.39 | Critic loss: 8.30 | Entropy loss: -0.0166  | Total Loss: 7.89 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 912/200000  | Episode Reward: 50.0  | Average Reward 48.61  | Actor loss: 0.12 | Critic loss: 7.38 | Entropy loss: -0.0131  | Total Loss: 7.49 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 921/200000  | Episode Reward: 43.0  | Average Reward 48.52  | Actor loss: -0.12 | Critic loss: 5.97 | Entropy loss: -0.0088  | Total Loss: 5.84 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 930/200000  | Episode Reward: 36.0  | Average Reward 49.05  | Actor loss: -0.21 | Critic loss: 9.49 | Entropy loss: -0.0063  | Total Loss: 9.27 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 939/200000  | Episode Reward: 30.0  | Average Reward 48.45  | Actor loss: -0.09 | Critic loss: 8.26 | Entropy loss: -0.0057  | Total Loss: 8.17 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 949/200000  | Episode Reward: 66.0  | Average Reward 48.47  | Actor loss: 0.19 | Critic loss: 10.34 | Entropy loss: -0.0099  | Total Loss: 10.52 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 959/200000  | Episode Reward: 43.0  | Average Reward 48.65  | Actor loss: 0.10 | Critic loss: 3.97 | Entropy loss: -0.0171  | Total Loss: 4.05 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 968/200000  | Episode Reward: 76.0  | Average Reward 48.79  | Actor loss: 0.17 | Critic loss: 16.47 | Entropy loss: -0.0129  | Total Loss: 16.62 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 978/200000  | Episode Reward: 37.0  | Average Reward 48.20  | Actor loss: -0.07 | Critic loss: 6.19 | Entropy loss: -0.0068  | Total Loss: 6.12 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 987/200000  | Episode Reward: 76.0  | Average Reward 48.94  | Actor loss: 0.25 | Critic loss: 8.56 | Entropy loss: -0.0156  | Total Loss: 8.80 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal st

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 997/200000  | Episode Reward: 78.0  | Average Reward 48.87  | Actor loss: 0.17 | Critic loss: 8.78 | Entropy loss: -0.0099  | Total Loss: 8.94 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1006/200000  | Episode Reward: 69.0  | Average Reward 48.88  | Actor loss: 0.03 | Critic loss: 10.44 | Entropy loss: -0.0089  | Total Loss: 10.46 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1007/200000 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1016/200000  | Episode Reward: 56.0  | Average Reward 48.61  | Actor loss: 0.04 | Critic loss: 9.10 | Entropy loss: -0.0120  | Total Loss: 9.13 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1025/200000  | Episode Reward: 72.0  | Average Reward 48.38  | Actor loss: 0.74 | Critic loss: 10.10 | Entropy loss: -0.0196  | Total Loss: 10.82 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1035/200000  | Episode Reward: 76.0  | Average Reward 48.93  | Actor loss: 0.85 | Critic loss: 15.58 | Entropy loss: -0.0205  | Total Loss: 16.41 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1043/200000  | Episode Reward: 34.0  | Average Reward 49.69  | Actor loss: -0.20 | Critic loss: 14.14 | Entropy loss: -0.0103  | Total Loss: 13.93 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1052/200000  | Episode Reward: 31.0  | Average Reward 49.52  | Actor loss: -0.42 | Critic loss: 10.76 | Entropy loss: -0.0120  | Total Loss: 10.33 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step r

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1062/200000  | Episode Reward: 34.0  | Average Reward 49.08  | Actor loss: 0.05 | Critic loss: 12.56 | Entropy loss: -0.0103  | Total Loss: 12.61 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1072/200000  | Episode Reward: 18.0  | Average Reward 48.20  | Actor loss: -0.22 | Critic loss: 6.57 | Entropy loss: -0.0129  | Total Loss: 6.34 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal st

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1081/200000  | Episode Reward: 50.0  | Average Reward 47.84  | Actor loss: -0.05 | Critic loss: 5.84 | Entropy loss: -0.0068  | Total Loss: 5.78 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1082/200000  | Episode Reward: 33.0  |

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1091/200000  | Episode Reward: 59.0  | Average Reward 47.91  | Actor loss: 0.16 | Critic loss: 12.71 | Entropy loss: -0.0124  | Total Loss: 12.86 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1100/200000  | Episode Reward: 53.0  | Average Reward 48.40  | Actor loss: 0.35 | Critic loss: 5.51 | Entropy loss: -0.0148  | Total Loss: 5.85 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Step: 250
Training  | Episode: 1109/200000  | Episode Reward: 60.0  | Average Reward 48.90  | Actor loss: -0.18 | Critic loss: 8.97 | Entropy loss: -0.0125  | Total Loss: 8.77 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1110/200000  | Episode Reward: 20.0  | Average Reward 48.73  | Actor loss: -0.17 | Critic loss: 9.22 | Entropy loss: -0.0063  | Total Loss: 9.05 | Total Steps: 250
--- target colour: yellow, tar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1119/200000  | Episode Reward: 46.0  | Average Reward 48.86  | Actor loss: -0.08 | Critic loss: 7.44 | Entropy loss: -0.0104  | Total Loss: 7.34 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1128/200000  | Episode Reward: 38.0  | Average Reward 48.58  | Actor loss: -0.17 | Critic loss: 12.25 | Entropy loss: -0.0106  | Total Loss: 12.07 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1138/200000  | Episode Reward: 72.0  | Average Reward 48.39  | Actor loss: 0.10 | Critic loss: 9.39 | Entropy loss: -0.0094  | Total Loss: 9.48 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1148/200000  | Episode Reward: 66.0  | Average Reward 48.65  | Actor loss: 0.25 | Critic loss: 8.19 | Entropy loss: -0.0139  | Total Loss: 8.43 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1157/200000  | Episode Reward: 17.0  | Average Reward 48.62  | Actor loss: -0.29 | Critic loss: 6.28 | Entropy loss: -0.0135  | Total Loss: 5.98 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1167/200000  | Episode Reward: 34.0  | Average Reward 48.17  | Actor loss: -0.10 | Critic loss: 6.22 | Entropy loss: -0.0100  | Total Loss: 6.11 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1177/200000  | Episode Reward: 30.0  | Average Reward 48.64  | Actor loss: -0.72 | Critic loss: 8.61 | Entropy loss: -0.0194  | Total Loss: 7.87 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: 

Decision Step reward: -3
Step: 250
Training  | Episode: 1186/200000  | Episode Reward: 76.0  | Average Reward 48.76  | Actor loss: 0.19 | Critic loss: 13.28 | Entropy loss: -0.0121  | Total Loss: 13.46 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1187/200000  | Episode Reward: 56.0  | Average Reward 48.66  | Actor loss: -0.18 | Crit

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1196/200000  | Episode Reward: 40.0  | Average Reward 49.11  | Actor loss: -0.12 | Critic loss: 7.87 | Entropy loss: -0.0105  | Total Loss: 7.74 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1206/200000  | Episode Reward: 69.0  | Average Reward 48.52  | Actor loss: 0.02 | Critic loss: 10.09 | Entropy loss: -0.0099  | Total Loss: 10.10 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1216/200000  | Episode Reward: 53.0  | Average Reward 48.39  | Actor loss: -0.00 | Critic loss: 8.63 | Entropy loss: -0.0106  | Total Loss: 8.62 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1225/200000  | Episode Reward: 66.0  | Average Reward 48.64  | Actor loss: 0.17 | Critic loss: 9.54 | Entropy loss: -0.0094  | Total Loss: 9.70 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1235/200000  | Episode Reward: 69.0  | Average Reward 48.73  | Actor loss: 0.05 | Critic loss: 6.71 | Entropy loss: -0.0160  | Total Loss: 6.74 | Total Steps: 250
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1244/200000  | Episode Reward: 46.0  | Average Reward 48.39  | Actor loss: -0.06 | Critic loss: 6.58 | Entropy loss: -0.0120  | Total Loss: 6.51 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1253/200000  | Episode Reward: 40.0  | Average Reward 48.30  | Actor loss: -0.24 | Critic loss: 5.19 | Entropy loss: -0.0147  | Total Loss: 4.93 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1263/200000  | Episode Reward: 54.0  | Average Reward 48.71  | Actor loss: -0.02 | Critic loss: 8.15 | Entropy loss: -0.0131  | Total Loss: 8.11 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1272/200000  | Episode Reward: 40.0  | Average Reward 48.84  | Actor loss: -0.01 | Critic loss: 10.83 | Entropy loss: -0.0138  | Total Loss: 10.81 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1282/200000  | Episode Reward: 33.0  | Average Reward 49.38  | Actor loss: -0.26 | Critic loss: 5.06 | Entropy loss: -0.0096  | Total Loss: 4.80 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1291/200000  | Episode Reward: 43.0  | Average Reward 49.75  | Actor loss: -0.09 | Critic loss: 4.50 | Entropy loss: -0.0098  | Total Loss: 4.39 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1300/200000  | Episode Reward: 79.0  | Average Reward 49.62  | Actor loss: 0.13 | Critic loss: 10.53 | Entropy loss: -0.0056  | Total Loss: 10.65 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1309/200000  | Episode Reward: 48.0  | Average Reward 49.59  | Actor loss: -0.12 | Critic loss: 8.39 | Entropy loss: -0.0097  | Total Loss: 8.26 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | E

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1319/200000  | Episode Reward: 33.0  | Average Reward 49.74  | Actor loss: -0.28 | Critic loss: 5.87 | Entropy loss: -0.0097  | Total Loss: 5.58 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
T

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1328/200000  | Episode Reward: 52.0  | Average Reward 49.77  | Actor loss: -0.08 | Critic loss: 7.12 | Entropy loss: -0.0093  | Total Loss: 7.04 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1329/200000

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1337/200000  | Episode Reward: 50.0  | Average Reward 50.56  | Actor loss: -0.08 | Critic loss: 8.90 | Entropy loss: -0.0161  | Total Loss: 8.81 | Total Steps: 250
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 50.8. Model has been saved-----
Training  | Episode: 1346/200000  | Episode Reward: 43.0  | Average Reward 50.80  | Actor loss: -0.24 | Critic loss: 4.95 | Entropy loss: -0.0151  | Total Loss: 4.69 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1355/200000  | Episode Reward: 31.0  | Average Reward 50.88  | Actor loss: -0.06 | Critic loss: 7.07 | Entropy loss: -0.0052  | Total Loss: 7.01 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1364/200000  | Episode Reward: 46.0  | Average Reward 51.31  | Actor loss: -0.24 | Critic loss: 7.96 | Entropy loss: -0.0124  | Total Loss: 7.71 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal step

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1373/200000  | Episode Reward: 53.0  | Average Reward 51.12  | Actor loss: -0.10 | Critic loss: 4.38 | Entropy loss: -0.0148  | Total Loss: 4.26 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1383/200000  | Episode Reward: 63.0  | Average Reward 51.00  | Actor loss: 0.02 | Critic loss: 10.23 | Entropy loss: -0.0107  | Total Loss: 10.23 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1392/200000  | Episode Reward: 99.0  | Average Reward 51.60  | Actor loss: 0.15 | Critic loss: 22.79 | Entropy loss: -0.0083  | Total Loss: 22.93 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Training  | Episode: 1401/200000  | Episode Reward: 50.0  | Average Reward 51.32  | Actor loss: 0.02 | Critic loss: 9.34 | Entropy loss: -0.0122  | Total Loss: 9.35 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1402/200000  | Episode Reward: 68.0  | Average Reward 51.44  | Actor loss: 0.20 | Critic loss: 6.76 | Entropy loss: -0.0122  | Total Loss: 6.95 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1411/200000  | Episode Reward: 37.0  | Average Reward 51.27  | Actor loss: -0.15 | Critic loss: 7.25 | Entropy loss: -0.0082  | Total Loss: 7.09 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal step

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1421/200000  | Episode Reward: 56.0  | Average Reward 51.52  | Actor loss: -0.11 | Critic loss: 4.09 | Entropy loss: -0.0136  | Total Loss: 3.96 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal ste

Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1430/200000  | Episode Reward: 24.0  | Average Reward 51.03  | Actor loss: -0.09 | Critic loss: 4.43 | Entropy loss: -0.0039  | Total Loss: 4.34 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1440/200000  | Episode Reward: 106.0  | Average Reward 50.91  | Actor loss: 0.67 | Critic loss: 32.37 | Entropy loss: -0.0135  | Total Loss: 33.03 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1449/200000  | Episode Reward: 79.0  | Average Reward 50.66  | Actor loss: 0.01 | Critic loss: 11.50 | Entropy loss: -0.0074  | Total Loss: 11.51 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1459/200000  | Episode Reward: 28.0  | Average Reward 50.24  | Actor loss: 0.06 | Critic loss: 9.15 | Entropy loss: -0.0083  | Total Loss: 9.20 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps


Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1468/200000  | Episode Reward: 36.0  | Average Reward 50.70  | Actor loss: -0.11 | Critic loss: 7.75 | Entropy loss: -0.0151  | Total Loss: 7.62 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1478/200000  | Episode Reward: 66.0  | Average Reward 50.66  | Actor loss: 0.24 | Critic loss: 8.65 | Entropy loss: -0.0109  | Total Loss: 8.88 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1487/200000  | Episode Reward: 82.0  | Average Reward 50.75  | Actor loss: 0.03 | Critic loss: 12.63 | Entropy loss: -0.0057  | Total Loss: 12.65 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1496/200000  | Episode Reward: 66.0  | Average Reward 51.01  | Actor loss: 0.08 | Critic loss: 7.29 | Entropy loss: -0.0118  | Total Loss: 7.36 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1505/200000  | Episode Reward: 47.0  | Average Reward 51.14  | Actor loss: -0.14 | Critic loss: 9.95 | Entropy loss: -0.0088  | Total Loss: 9.80 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1514/200000  | Episode Reward: 63.0  | Average Reward 51.47  | Actor loss: -0.04 | Critic loss: 7.06 | Entropy loss: -0.0048  | Total Loss: 7.01 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1524/200000  | Episode Reward: 46.0  | Average Reward 51.19  | Actor loss: -0.06 | Critic loss: 4.43 | Entropy loss: -0.0075  | Total Loss: 4.36 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1533/200000  | Episode Reward: 75.0  | Average Reward 51.36  | Actor loss: 0.28 | Critic loss: 13.15 | Entropy loss: -0.0129  | Total Loss: 13.42 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1542/200000  | Episode Reward: 34.0  | Average Reward 50.69  | Actor loss: -0.04 | Critic loss: 7.94 | Entropy loss: -0.0078  | Total Loss: 7.90 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1543/200000  | Episode Reward: 72.0  | 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1552/200000  | Episode Reward: 37.0  | Average Reward 50.27  | Actor loss: -0.06 | Critic loss: 6.35 | Entropy loss: -0.0089  | Total Loss: 6.28 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1562/200000  | Episode Reward: 34.0  | Average Reward 49.67  | Actor loss: -0.20 | Critic loss: 6.56 | Entropy loss: -0.0086  | Total Loss: 6.35 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Step: 250
Training  | Episode: 1571/200000  | Episode Reward: 82.0  | Average Reward 50.20  | Actor loss: 0.53 | Critic loss: 18.45 | Entropy loss: -0.0143  | Total Loss: 18.96 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1572/200000  | Episode Reward: 53.0  | Average Reward 50.40  | Ac

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1581/200000  | Episode Reward: 59.0  | Average Reward 50.47  | Actor loss: -0.03 | Critic loss: 10.22 | Entropy loss: -0.0124  | Total Loss: 10.18 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1590/200000  | Episode Reward: 37.0  | Average Reward 49.72  | Actor loss: -0.23 | Critic loss: 6.87 | Entropy loss: -0.0094  | Total Loss: 6.63 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1600/200000  | Episode Reward: 53.0  | Average Reward 49.87  | Actor loss: -0.09 | Critic loss: 8.91 | Entropy loss: -0.0104  | Total Loss: 8.80 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1609/200000  | Episode Reward: 63.0  | Average Reward 50.02  | Actor loss: -0.09 | Critic loss: 10.13 | Entropy loss: -0.0103  | Total Loss: 10.02 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1619/200000  | Episode Reward: 46.0  | Average Reward 50.39  | Actor loss: -0.20 | Critic loss: 8.51 | Entropy loss: -0.0117  | Total Loss: 8.30 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1629/200000  | Episode Reward: 37.0  | Average Reward 49.82  | Actor loss: -0.09 | Critic loss: 8.34 | Entropy loss: -0.0098  | Total Loss: 8.25 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1638/200000  | Episode Reward: 89.0  | Average Reward 50.49  | Actor loss: 0.57 | Critic loss: 21.11 | Entropy loss: -0.0071  | Total Loss: 21.67 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1639/200000  | Ep

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1648/200000  | Episode Reward: 53.0  | Average Reward 50.41  | Actor loss: 0.04 | Critic loss: 8.55 | Entropy loss: -0.0098  | Total Loss: 8.58 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1657/200000  | Episode Reward: 37.0  | Average Reward 50.62  | Actor loss: -0.31 | Critic loss: 6.58 | Entropy loss: -0.0089  | Total Loss: 6.26 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1667/200000  | Episode Reward: 31.0  | Average Reward 50.78  | Actor loss: -0.34 | Critic loss: 8.48 | Entropy loss: -0.0121  | Total Loss: 8.13 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1676/200000  | Episode Reward: 50.0  | Average Reward 51.02  | Actor loss: -0.11 | Critic loss: 11.38 | Entropy loss: -0.0064  | Total Loss: 11.26 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1685/200000  | Episode Reward: 24.0  | Average Reward 50.99  | Actor loss: -0.51 | Critic loss: 7.28 | Entropy loss: -0.0105  | Total Loss: 6.76 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1694/200000  | Episode Reward: 111.0  | Average Reward 51.27  | Actor loss: 0.62 | Critic loss: 29.05 | Entropy loss: -0.0100  | Total Loss: 29.66 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1704/200000  | Episode Reward: 46.0  | Average Reward 50.48  | Actor loss: -0.24 | Critic loss: 6.75 | Entropy loss: -0.0092  | Total Loss: 6.51 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1713/200000  | Episode Reward: 49.0  | Average Reward 50.25  | Actor loss: -0.15 | Critic loss: 7.95 | Entropy loss: -0.0066  | Total Loss: 7.79 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1723/200000  | Episode Reward: 53.0  | Average Reward 50.78  | Actor loss: -0.15 | Critic loss: 5.45 | Entropy loss: -0.0095  | Total Loss: 5.29 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1733/200000  | Episode Reward: 73.0  | Average Reward 50.95  | Actor loss: 0.29 | Critic loss: 12.83 | Entropy loss: -0.0108  | Total Loss: 13.11 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step r

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1743/200000  | Episode Reward: 60.0  | Average Reward 51.05  | Actor loss: 0.09 | Critic loss: 8.96 | Entropy loss: -0.0085  | Total Loss: 9.03 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1752/200000  | Episode Reward: 16.0  | Average Reward 51.79  | Actor loss: -0.34 | Critic loss: 11.31 | Entropy loss: -0.0087  | Total Loss: 10.96 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 52.36. Model has been saved-----
Training  | Episode: 1761/200000  | Episode Reward: 33.0  | Average Reward 52.36  | Actor loss: -0.27 | Critic loss: 5.41 | Entropy loss: -0.0088  | Total Loss: 5.13 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1770/200000  | Episode Reward: 62.0  | Average Reward 52.24  | Actor loss: 0.07 | Critic loss: 11.32 | Entropy loss: -0.0135  | Total Loss: 11.37 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1780/200000  | Episode Reward: 24.0  | Average Reward 52.15  | Actor loss: -0.70 | Critic loss: 10.45 | Entropy loss: -0.0156  | Total Loss: 9.74 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 53.125. Model has been saved-----
Training  | Episode: 1788/200000  | Episode Reward: 60.0  | Average Reward 53.12  | Actor loss: -0.12 | Critic loss: 14.08 | Entropy loss: -0.0118  | Total Loss: 13.94 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1798/200000  | Episode Reward: 41.0  | Average Reward 53.07  | Actor loss: -0.31 | Critic loss: 10.05 | Entropy loss: -0.0133  | Total Loss: 9.73 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1807/200000  | Episode Reward: 50.0  | Average Reward 52.99  | Actor loss: -0.12 | Critic loss: 8.00 | Entropy loss: -0.0081  | Total Loss: 7.88 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1816/200000  | Episode Reward: 56.0  | Average Reward 52.86  | Actor loss: -0.05 | Critic loss: 6.78 | Entropy loss: -0.0117  | Total Loss: 6.72 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1817/200000  | Episode Reward: 59.0  | Average Reward 52.89  | Actor loss: 0.01 | Crit

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 53.39. Model has been saved-----
Training  | Episode: 1826/200000  | Episode Reward: 98.0  | Average Reward 53.39  | Actor loss: 0.44 | Critic loss: 17.50 | Entropy loss: -0.0113  | Total Loss: 17.93 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1835/200000  | Episode Reward: 56.0  | Average Reward 53.40  | Actor loss: 0.04 | Critic loss: 7.40 | Entropy loss: -0.0099  | Total Loss: 7.42 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 53.725. Model has been saved-----
Training  | Episode: 1844/200000  | Episode Reward: 57.0  | Average Reward 53.73  | Actor loss: -0.04 | Critic loss: 6.83 | Entropy loss: -0.0128  | Total Loss: 6.77 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1853/200000  | Episode Reward: 66.0  | Average Reward 53.71  | Actor loss: 0.05 | Critic loss: 11.71 | Entropy loss: -0.0144  | Total Loss: 11.74 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1854/200000  | Episode Reward: 39.0  | Average Reward 53.51  | Actor loss: -0.10 | Critic loss: 4.34 | Entropy loss: -0.0045  | Total Loss: 4.24 | Total Steps: 250

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1863/200000  | Episode Reward: 31.0  | Average Reward 52.81  | Actor loss: -0.27 | Critic loss: 7.13 | Entropy loss: -0.0124  | Total Loss: 6.85 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1873/200000  | Episode Reward: 30.0  | Average Reward 53.03  | Actor loss: -0.24 | Critic loss: 6.73 | Entropy loss: -0.0074  | Total Loss: 6.48 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1882/200000  | Episode Reward: 69.0  | Average Reward 53.16  | Actor loss: 0.26 | Critic loss: 7.96 | Entropy loss: -0.0152  | Total Loss: 8.21 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Step: 250
Training  | Episode: 1891/200000  | Episode Reward: 63.0  | Average Reward 53.44  | Actor loss: 0.03 | Critic loss: 7.81 | Entropy loss: -0.0095  | Total Loss: 7.83 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1892/200000  | Episode Reward: 65.0  | Average Reward 53.50  | Actor loss: 0.14 | Critic loss: 7.31 | Entropy loss: -0.0120  | Total Loss: 7.44 | Total Steps: 250
--- target colour: black, 

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1901/200000  | Episode Reward: 79.0  | Average Reward 53.59  | Actor loss: 0.07 | Critic loss: 9.71 | Entropy loss: -0.0101  | Total Loss: 9.77 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1910/200000  | Episode Reward: 56.0  | Average Reward 53.45  | Actor loss: -0.12 | Critic loss: 5.29 | Entropy loss: -0.0092  | Total Loss: 5.15 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1911/200000  | Episode Reward: 33.0  | Average Reward 53.39  | Actor loss: -0.22 | Critic 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1920/200000  | Episode Reward: 47.0  | Average Reward 53.09  | Actor loss: -0.16 | Critic loss: 9.86 | Entropy loss: -0.0136  | Total Loss: 9.69 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1930/200000  | Episode Reward: 56.0  | Average Reward 53.23  | Actor loss: -0.07 | Critic loss: 7.21 | Entropy loss: -0.0044  | Total Loss: 7.13 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal step

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1939/200000  | Episode Reward: 82.0  | Average Reward 53.84  | Actor loss: 0.08 | Critic loss: 18.34 | Entropy loss: -0.0052  | Total Loss: 18.41 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 1948/200000  | Episode Reward: 33.0  | Average Reward 53.16  | Actor loss: -0.08 | Critic loss: 7.60 | Entropy loss: -0.0058  | Total Loss: 7.52 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 1958/200000  | Episode Reward: 47.0  | Average Reward 52.90  | Actor loss: -0.10 | Critic loss: 8.56 | Entropy loss: -0.0122  | Total Loss: 8.45 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1967/200000  | Episode Reward: 72.0  | Average Reward 53.09  | Actor loss: 0.06 | Critic loss: 9.46 | Entropy loss: -0.0117  | Total Loss: 9.50 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3

Training  | Episode: 1976/200000  | Episode Reward: 40.0  | Average Reward 52.30  | Actor loss: -0.17 | Critic loss: 11.89 | Entropy loss: -0.0098  | Total Loss: 11.72 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1977/200000  | Episode Reward: 50.0  | Average Reward 52.15  | Actor loss

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1986/200000  | Episode Reward: 50.0  | Average Reward 51.53  | Actor loss: -0.12 | Critic loss: 6.27 | Entropy loss: -0.0123  | Total Loss: 6.14 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Step: 250
Training  | Episode: 1995/200000  | Episode Reward: 56.0  | Average Reward 51.49  | Actor loss: -0.06 | Critic loss: 6.12 | Entropy loss: -0.0072  | Total Loss: 6.05 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 1996/200000  | Episode Reward: 72.0  | Average Reward 51.65  | Actor loss: 0.06 | Critic loss: 12.02 | Entropy loss:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2005/200000  | Episode Reward: 34.0  | Average Reward 51.30  | Actor loss: -0.24 | Critic loss: 4.41 | Entropy loss: -0.0087  | Total Loss: 4.17 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2014/200000  | Episode Reward: 83.0  | Average Reward 51.97  | Actor loss: 0.08 | Critic loss: 13.87 | Entropy loss: -0.0103  | Total Loss: 13.94 | Total Steps: 250
--- target colour: blue, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2023/200000  | Episode Reward: 72.0  | Average Reward 52.03  | Actor loss: -0.02 | Critic loss: 10.83 | Entropy loss: -0.0117  | Total Loss: 10.79 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2024/200000  | Episode Reward: 62.0 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2033/200000  | Episode Reward: 30.0  | Average Reward 52.22  | Actor loss: -0.10 | Critic loss: 6.10 | Entropy loss: -0.0083  | Total Loss: 5.98 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2043/200000  | Episode Reward: 92.0  | Average Reward 51.81  | Actor loss: 0.23 | Critic loss: 21.39 | Entropy loss: -0.0087  | Total Loss: 21.61 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2052/200000  | Episode Reward: 40.0  | Average Reward 51.80  | Actor loss: -0.06 | Critic loss: 9.94 | Entropy loss: -0.0102  | Total Loss: 9.87 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2061/200000  | Episode Reward: 56.0  | Average Reward 52.70  | Actor loss: -0.04 | Critic loss: 8.12 | Entropy loss: -0.0066  | Total Loss: 8.08 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2070/200000  | Episode Reward: 50.0  | Average Reward 52.81  | Actor loss: 0.10 | Critic loss: 9.71 | Entropy loss: -0.0072  | Total Loss: 9.81 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | E

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2080/200000  | Episode Reward: 39.0  | Average Reward 52.63  | Actor loss: -0.16 | Critic loss: 9.30 | Entropy loss: -0.0054  | Total Loss: 9.13 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2089/200000  | Episode Reward: 70.0  | Average Reward 52.15  | Actor loss: -0.10 | Critic loss: 13.08 | Entropy loss: -0.0113  | Total Loss: 12.97 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2090/200000

Step: 250
Training  | Episode: 2098/200000  | Episode Reward: 73.0  | Average Reward 52.59  | Actor loss: -0.03 | Critic loss: 13.69 | Entropy loss: -0.0101  | Total Loss: 13.65 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2099/2000

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2108/200000  | Episode Reward: 37.0  | Average Reward 52.31  | Actor loss: -0.14 | Critic loss: 7.08 | Entropy loss: -0.0138  | Total Loss: 6.92 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2118/200000  | Episode Reward: 33.0  | Average Reward 51.80  | Actor loss: -0.07 | Critic loss: 4.32 | Entropy loss: -0.0054  | Total Loss: 4.24 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2127/200000  | Episode Reward: 33.0  | Average Reward 51.71  | Actor loss: -0.23 | Critic loss: 4.14 | Entropy loss: -0.0122  | Total Loss: 3.91 | Total Steps: 250
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2137/200000  | Episode Reward: 49.0  | Average Reward 50.98  | Actor loss: -0.02 | Critic loss: 3.67 | Entropy loss: -0.0062  | Total Loss: 3.64 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2147/200000  | Episode Reward: 46.0  | Average Reward 50.55  | Actor loss: 0.04 | Critic loss: 5.08 | Entropy loss: -0.0052  | Total Loss: 5.11 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2157/200000  | Episode Reward: 63.0  | Average Reward 50.56  | Actor loss: 0.51 | Critic loss: 14.15 | Entropy loss: -0.0146  | Total Loss: 14.65 | Total Steps: 250
--- target colour: green, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2166/200000  | Episode Reward: 72.0  | Average Reward 51.42  | Actor loss: 0.03 | Critic loss: 15.33 | Entropy loss: -0.0089  | Total Loss: 15.35 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2175/200000  | Episode Reward: 24.0  | Average Reward 51.20  | Actor loss: -0.20 | Critic loss: 6.12 | Entropy loss: -0.0116  | Total Loss: 5.91 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2185/200000  | Episode Reward: 41.0  | Average Reward 51.42  | Actor loss: -0.36 | Critic loss: 7.94 | Entropy loss: -0.0136  | Total Loss: 7.57 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2194/200000  | Episode Reward: 56.0  | Average Reward 51.80  | Actor loss: -0.01 | Critic loss: 4.99 | Entropy loss: -0.0086  | Total Loss: 4.98 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2203/200000  | Episode Reward: 59.0  | Average Reward 52.28  | Actor loss: 0.03 | Critic loss: 6.30 | Entropy loss: -0.0049  | Total Loss: 6.32 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2212/200000  | Episode Reward: 57.0  | Average Reward 52.44  | Actor loss: -0.28 | Critic loss: 9.87 | Entropy loss: -0.0117  | Total Loss: 9.58 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rew

Step: 250
Training  | Episode: 2221/200000  | Episode Reward: 63.0  | Average Reward 52.38  | Actor loss: -0.03 | Critic loss: 9.21 | Entropy loss: -0.0113  | Total Loss: 9.17 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2222/200000  | Episode Reward: 43.0  | Average Reward 52.37  | Actor loss: -0.27 | Critic loss: 5.39 | Entropy loss: -0.0108  | Total Loss: 5.11

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2231/200000  | Episode Reward: 57.0  | Average Reward 51.97  | Actor loss: 0.13 | Critic loss: 11.72 | Entropy loss: -0.0055  | Total Loss: 11.84 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2240/200000  | Episode Reward: 34.0  | Average Reward 52.10  | Actor loss: -0.27 | Critic loss: 6.57 | Entropy loss: -0.0138  | Total Loss: 6.28 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2250/200000  | Episode Reward: 72.0  | Average Reward 51.19  | Actor loss: 0.20 | Critic loss: 9.69 | Entropy loss: -0.0185  | Total Loss: 9.87 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2260/200000  | Episode Reward: 49.0  | Average Reward 50.84  | Actor loss: -0.01 | Critic loss: 5.05 | Entropy loss: -0.0122  | Total Loss: 5.03 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2269/200000  | Episode Reward: 44.0  | Average Reward 50.32  | Actor loss: 0.04 | Critic loss: 6.73 | Entropy loss: -0.0069  | Total Loss: 6.77 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2279/200000  | Episode Reward: 56.0  | Average Reward 50.49  | Actor loss: -0.05 | Critic loss: 6.60 | Entropy loss: -0.0103  | Total Loss: 6.54 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2288/200000  | Episode Reward: 21.0  | Average Reward 50.95  | Actor loss: -0.49 | Critic loss: 7.41 | Entropy loss: -0.0174  | Total Loss: 6.91 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2298/200000  | Episode Reward: 63.0  | Average Reward 50.38  | Actor loss: -0.05 | Critic loss: 6.95 | Entropy loss: -0.0094  | Total Loss: 6.88 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2307/200000  | Episode Reward: 73.0  | Average Reward 51.02  | Actor loss: 0.16 | Critic loss: 11.83 | Entropy loss: -0.0072  | Total Loss: 11.98 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2316/200000  | Episode Reward: 53.0  | Average Reward 51.79  | Actor loss: -0.07 | Critic loss: 8.18 | Entropy loss: -0.0061  | Total Loss: 8.10 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in termina

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2325/200000  | Episode Reward: 53.0  | Average Reward 52.35  | Actor loss: -0.12 | Critic loss: 7.68 | Entropy loss: -0.0073  | Total Loss: 7.55 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rewa

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2335/200000  | Episode Reward: 69.0  | Average Reward 52.60  | Actor loss: 0.07 | Critic loss: 10.62 | Entropy loss: -0.0100  | Total Loss: 10.68 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 1

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2344/200000  | Episode Reward: 79.0  | Average Reward 52.70  | Actor loss: 0.23 | Critic loss: 9.42 | Entropy loss: -0.0120  | Total Loss: 9.63 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2354/200000  | Episode Reward: 60.0  | Average Reward 52.46  | Actor loss: 0.05 | Critic loss: 6.50 | Entropy loss: -0.0099  | Total Loss: 6.54 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2363/200000  | Episode Reward: 65.0  | Average Reward 52.01  | Actor loss: 0.06 | Critic loss: 5.38 | Entropy loss: -0.0062  | Total Loss: 5.43 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2372/200000  | Episode Reward: 49.0  | Average Reward 52.17  | Actor loss: -0.10 | Critic loss: 7.25 | Entropy loss: -0.0085  | Total Loss: 7.14 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2382/200000  | Episode Reward: 46.0  | Average Reward 52.84  | Actor loss: -0.09 | Critic loss: 10.00 | Entropy loss: -0.0091  | Total Loss: 9.90 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in termin

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2391/200000  | Episode Reward: 23.0  | Average Reward 52.76  | Actor loss: -0.06 | Critic loss: 6.30 | Entropy loss: -0.0032  | Total Loss: 6.24 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2401/200000  | Episode Reward: 85.0  | Average Reward 52.65  | Actor loss: 0.09 | Critic loss: 11.39 | Entropy loss: -0.0088  | Total Loss: 11.47 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step re

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2410/200000  | Episode Reward: 56.0  | Average Reward 51.87  | Actor loss: -0.08 | Critic loss: 5.78 | Entropy loss: -0.0100  | Total Loss: 5.69 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2419/200000  | Episode Reward: 80.0  | Average Reward 52.22  | Actor loss: -0.16 | Critic loss: 16.13 | Entropy loss: -0.0134  | Total Loss: 15.96 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2429/200000  | Episode Reward: 60.0  | Average Reward 52.35  | Actor loss: -0.13 | Critic loss: 8.58 | Entropy loss: -0.0116  | Total Loss: 8.44 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2438/200000  | Episode Reward: 27.0  | Average Reward 52.05  | Actor loss: -0.74 | Critic loss: 9.91 | Entropy loss: -0.0125  | Total Loss: 9.16 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2447/200000  | Episode Reward: 40.0  | Average Reward 52.73  | Actor loss: -0.14 | Critic loss: 11.34 | Entropy loss: -0.0091  | Total Loss: 11.18 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2456/200000  | Episode Reward: 56.0  | Average Reward 53.42  | Actor loss: -0.18 | Critic loss: 4.61 | Entropy loss: -0.0086  | Total Loss: 4.42 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2466/200000  | Episode Reward: 63.0  | Average Reward 53.72  | Actor loss: 0.47 | Critic loss: 9.53 | Entropy loss: -0.0189  | Total Loss: 9.97 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal st

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2475/200000  | Episode Reward: 70.0  | Average Reward 53.92  | Actor loss: 0.09 | Critic loss: 16.01 | Entropy loss: -0.0139  | Total Loss: 16.08 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal ste

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2484/200000  | Episode Reward: 60.0  | Average Reward 53.86  | Actor loss: -0.03 | Critic loss: 5.61 | Entropy loss: -0.0062  | Total Loss: 5.58 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2493/200000  | Episode Reward: 44.0  | Average Reward 54.01  | Actor loss: -0.05 | Critic loss: 8.65 | Entropy loss: -0.0170  | Total Loss: 8.58 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2502/200000  | Episode Reward: 37.0  | Average Reward 54.20  | Actor loss: -0.25 | Critic loss: 8.20 | Entropy loss: -0.0089  | Total Loss: 7.94 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2511/200000  | Episode Reward: 49.0  | Average Reward 53.88  | Actor loss: -0.21 | Critic loss: 8.40 | Entropy loss: -0.0065  | Total Loss: 8.18 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2520/200000  | Episode Reward: 79.0  | Average Reward 53.56  | Actor loss: 0.14 | Critic loss: 11.88 | Entropy loss: -0.0131  | Total Loss: 12.01 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2530/200000  | Episode Reward: 37.0  | Average Reward 53.38  | Actor loss: -0.26 | Critic loss: 4.86 | Entropy loss: -0.0092  | Total Loss: 4.59 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal step

Training  | Episode: 2539/200000  | Episode Reward: 34.0  | Average Reward 53.37  | Actor loss: -0.25 | Critic loss: 8.62 | Entropy loss: -0.0059  | Total Loss: 8.37 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2540/200000  | Episode Reward: 39.0  | Average Reward 53.28  | Actor loss: -0.01 | Critic loss: 5.76 | Entropy loss: -0.0098  | Total Loss: 5.74 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step r

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2549/200000  | Episode Reward: 34.0  | Average Reward 54.13  | Actor loss: -0.15 | Critic loss: 7.25 | Entropy loss: -0.0132  | Total Loss: 7.09 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 1

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2558/200000  | Episode Reward: 50.0  | Average Reward 54.51  | Actor loss: -0.01 | Critic loss: 6.42 | Entropy loss: -0.0150  | Total Loss: 6.39 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 54.925. Model has been saved-----
Training  | Episode: 2567/200000  | Episode Reward: 102.0  | Average Reward 54.92  | Actor loss: -0.04 | Critic loss: 24.31 | Entropy loss: -0.0130  | Total Loss: 24.25 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal

Training  | Episode: 2576/200000  | Episode Reward: 36.0  | Average Reward 54.38  | Actor loss: -0.26 | Critic loss: 8.37 | Entropy loss: -0.0098  | Total Loss: 8.10 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2577/200000  | Episode Reward: 24.0  | Average Reward 54.10  | Actor loss: -0.19 | Critic loss: 8.32 | Entropy loss: -0.0076

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2586/200000  | Episode Reward: 86.0  | Average Reward 53.66  | Actor loss: 0.22 | Critic loss: 20.28 | Entropy loss: -0.0107  | Total Loss: 20.49 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2587/200000  | 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2596/200000  | Episode Reward: 92.0  | Average Reward 53.66  | Actor loss: 0.05 | Critic loss: 13.56 | Entropy loss: -0.0105  | Total Loss: 13.60 | Total Steps: 250
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2605/200000  | Episode Reward: 53.0  | Average Reward 53.54  | Actor loss: -0.06 | Critic loss: 6.74 | Entropy loss: -0.0065  | Total Loss: 6.67 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2614/200000  | Episode Reward: 53.0  | Average Reward 54.38  | Actor loss: -0.08 | Critic loss: 6.73 | Entropy loss: -0.0044  | Total Loss: 6.65 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2624/200000  | Episode Reward: 46.0  | Average Reward 53.54  | Actor loss: 0.00 | Critic loss: 5.71 | Entropy loss: -0.0043  | Total Loss: 5.71 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2633/200000  | Episode Reward: 60.0  | Average Reward 53.66  | Actor loss: -0.08 | Critic loss: 9.71 | Entropy loss: -0.0084  | Total Loss: 9.62 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal st

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2642/200000  | Episode Reward: 78.0  | Average Reward 54.35  | Actor loss: 0.16 | Critic loss: 10.25 | Entropy loss: -0.0143  | Total Loss: 10.40 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2651/200000  | Episode Reward: 37.0  | Average Reward 53.61  | Actor loss: -0.07 | Critic loss: 3.98 | Entropy loss: -0.0048  | Total Loss: 3.91 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2661/200000  | Episode Reward: 37.0  | Average Reward 53.31  | Actor loss: -0.14 | Critic loss: 5.27 | Entropy loss: -0.0070  | Total Loss: 5.13 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2670/200000  | Episode Reward: 43.0  | Average Reward 52.78  | Actor loss: -0.09 | Critic loss: 3.93 | Entropy loss: -0.0088  | Total Loss: 3.83 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2680/200000  | Episode Reward: 60.0  | Average Reward 53.13  | Actor loss: 0.05 | Critic loss: 11.65 | Entropy loss: -0.0081  | Total Loss: 11.69 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal st

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2690/200000  | Episode Reward: 46.0  | Average Reward 52.37  | Actor loss: -0.19 | Critic loss: 7.57 | Entropy loss: -0.0080  | Total Loss: 7.37 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in termina

Training  | Episode: 2699/200000  | Episode Reward: 34.0  | Average Reward 52.00  | Actor loss: -0.09 | Critic loss: 3.78 | Entropy loss: -0.0067  | Total Loss: 3.68 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2700/200000  | Episode Reward: 50.0  | Average Reward 52.06  | Actor los

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2709/200000  | Episode Reward: 53.0  | Average Reward 52.22  | Actor loss: 0.19 | Critic loss: 8.29 | Entropy loss: -0.0140  | Total Loss: 8.46 | Total Steps: 250
--- target colour: red, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2719/200000  | Episode Reward: 53.0  | Average Reward 51.94  | Actor loss: 0.26 | Critic loss: 11.66 | Entropy loss: -0.0127  | Total Loss: 11.91 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2728/200000  | Episode Reward: 46.0  | Average Reward 51.99  | Actor loss: -0.04 | Critic loss: 8.34 | Entropy loss: -0.0029  | Total Loss: 8.29 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2738/200000  | Episode Reward: 73.0  | Average Reward 52.24  | Actor loss: 0.02 | Critic loss: 10.63 | Entropy loss: -0.0081  | Total Loss: 10.64 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2748/200000  | Episode Reward: 53.0  | Average Reward 51.63  | Actor loss: 0.09 | Critic loss: 6.43 | Entropy loss: -0.0062  | Total Loss: 6.52 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2757/200000  | Episode Reward: 86.0  | Average Reward 52.27  | Actor loss: 0.49 | Critic loss: 17.34 | Entropy loss: -0.0096  | Total Loss: 17.82 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step rewar

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2766/200000  | Episode Reward: 46.0  | Average Reward 52.13  | Actor loss: -0.18 | Critic loss: 3.87 | Entropy loss: -0.0072  | Total Loss: 3.68 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2767/200000  | Episode Reward: 62.0  | Average Reward 51.93  | 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2776/200000  | Episode Reward: 21.0  | Average Reward 52.58  | Actor loss: -0.23 | Critic loss: 10.23 | Entropy loss: -0.0073  | Total Loss: 10.00 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step re

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2785/200000  | Episode Reward: 50.0  | Average Reward 53.43  | Actor loss: -0.07 | Critic loss: 5.91 | Entropy loss: -0.0030  | Total Loss: 5.84 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2794/200000  | Episode Reward: 66.0  | Average Reward 53.94  | Actor loss: -0.03 | Critic loss: 9.72 | Entropy loss: -0.0143  | Total Loss: 9.68 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2804/200000  | Episode Reward: 33.0  | Average Reward 54.34  | Actor loss: -0.13 | Critic loss: 6.29 | Entropy loss: -0.0081  | Total Loss: 6.15 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal step

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2813/200000  | Episode Reward: 56.0  | Average Reward 53.43  | Actor loss: -0.23 | Critic loss: 9.44 | Entropy loss: -0.0091  | Total Loss: 9.20 | Total Steps: 250
--- target colour: red, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  |

Step: 250
Training  | Episode: 2822/200000  | Episode Reward: 79.0  | Average Reward 53.84  | Actor loss: 0.21 | Critic loss: 8.86 | Entropy loss: -0.0132  | Total Loss: 9.06 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2823/200000  | Episode Reward: 50.0  | Average Reward 53.92  | 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2832/200000  | Episode Reward: 79.0  | Average Reward 54.69  | Actor loss: 0.44 | Critic loss: 18.92 | Entropy loss: -0.0109  | Total Loss: 19.35 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2842/200000  | Episode Reward: 73.0  | Average Reward 54.29  | Actor loss: 0.09 | Critic loss: 11.01 | Entropy loss: -0.0147  | Total Loss: 11.09 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2851/200000  | Episode Reward: 79.0  | Average Reward 54.77  | Actor loss: 0.33 | Critic loss: 9.10 | Entropy loss: -0.0108  | Total Loss: 9.41 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2860/200000  | Episode Reward: 53.0  | Average Reward 54.81  | Actor loss: 0.05 | Critic loss: 10.18 | Entropy loss: -0.0059  | Total Loss: 10.23 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 55.4. Model has been saved-----
Training  | Episode: 2869/200000  | Episode Reward: 63.0  | Average Reward 55.40  | Actor loss: 0.27 | Critic loss: 9.47 | Entropy loss: -0.0122  | Total Loss: 9.73 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3


Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2878/200000  | Episode Reward: 52.0  | Average Reward 55.30  | Actor loss: -0.11 | Critic loss: 3.33 | Entropy loss: -0.0076  | Total Loss: 3.22 | Total Steps: 250
--- target colour: black, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2887/200000  | Episode Reward: 53.0  | Average Reward 55.36  | Actor loss: -0.07 | Critic loss: 6.19 | Entropy loss: -0.0106  | Total Loss: 6.11 | Total Steps: 250
--- target colour: green, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2897/200000  | Episode Reward: 40.0  | Average Reward 55.12  | Actor loss: -0.15 | Critic loss: 5.38 | Entropy loss: -0.0100  | Total Loss: 5.22 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2906/200000  | Episode Reward: 49.0  | Average Reward 55.32  | Actor loss: -0.34 | Critic loss: 7.46 | Entropy loss: -0.0077  | Total Loss: 7.11 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 2907/200000  | Episode Reward: 56.0  | A

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2915/200000  | Episode Reward: 31.0  | Average Reward 55.81  | Actor loss: -0.39 | Critic loss: 13.64 | Entropy loss: -0.0095  | Total Loss: 13.25 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Step: 250
Training  | Episode: 2924/200000  | Episode Reward: 27.0  | Average Reward 55.51  | Actor loss: -0.52 | Critic loss: 5.44 | Entropy loss: -0.0142  | Total Loss: 4.91 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2925/200000  | Episode Reward: 45.0  | Average Reward 55.55  | Actor loss: -0.10 | Critic loss: 4.82 | Entropy loss: -0.0088  | Total Loss: 4.71 | Total Steps: 250
--

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2934/200000  | Episode Reward: 57.0  | Average Reward 55.87  | Actor loss: -0.16 | Critic loss: 11.26 | Entropy loss: -0.0098  | Total Loss: 11.09 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 56.65. Model has been saved-----
Training  | Episode: 2943/200000  | Episode Reward: 75.0  | Average Reward 56.65  | Actor loss: 0.24 | Critic loss: 7.50 | Entropy loss: -0.0139  | Total Loss: 7.72 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2952/200000  | Episode Reward: 77.0  | Average Reward 56.23  | Actor loss: -0.17 | Critic loss: 25.37 | Entropy loss: -0.0071  | Total Loss: 25.20 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2961/200000  | Episode Reward: 53.0  | Average Reward 56.16  | Actor loss: 0.03 | Critic loss: 5.90 | Entropy loss: -0.0081  | Total Loss: 5.92 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2970/200000  | Episode Reward: 89.0  | Average Reward 56.24  | Actor loss: -0.09 | Critic loss: 16.79 | Entropy loss: -0.0103  | Total Loss: 16.69 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step r

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2980/200000  | Episode Reward: 37.0  | Average Reward 55.78  | Actor loss: -0.34 | Critic loss: 6.93 | Entropy loss: -0.0091  | Total Loss: 6.58 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2989/200000  | Episode Reward: 59.0  | Average Reward 55.56  | Actor loss: 0.02 | Critic loss: 7.79 | Entropy loss: -0.0085  | Total Loss: 7.81 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 2990/200000  | E

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 2999/200000  | Episode Reward: 73.0  | Average Reward 55.20  | Actor loss: -0.02 | Critic loss: 14.24 | Entropy loss: -0.0052  | Total Loss: 14.22 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3008/200000  | Episode Reward: 69.0  | Average Reward 55.23  | Actor loss: 0.20 | Critic loss: 8.85 | Entropy loss: -0.0086  | Total Loss: 9.04 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3017/200000  | Episode Reward: 63.0  | Average Reward 55.23  | Actor loss: 0.03 | Critic loss: 6.23 | Entropy loss: -0.0077  | Total Loss: 6.25 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

Step: 250
Training  | Episode: 3026/200000  | Episode Reward: 76.0  | Average Reward 54.67  | Actor loss: 0.22 | Critic loss: 12.77 | Entropy loss: -0.0074  | Total Loss: 12.98 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3027/200000  | Episode Reward: 43.0  | Average Reward 54.76  | Actor loss: -0.11 | Critic loss: 3.56 | Entropy loss: -0.0043  | Total Loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3036/200000  | Episode Reward: 96.0  | Average Reward 54.35  | Actor loss: 0.27 | Critic loss: 21.45 | Entropy loss: -0.0115  | Total Loss: 21.71 | Total Ste

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3045/200000  | Episode Reward: 69.0  | Average Reward 54.72  | Actor loss: 0.15 | Critic loss: 6.15 | Entropy loss: -0.0122  | Total Loss: 6.28 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3054/200000  | Episode Reward: 53.0  | Average Reward 54.81  | Actor loss: 0.11 | Critic loss: 8.40 | Entropy loss: -0.0059  | Total Loss: 8.50 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in termina

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3064/200000  | Episode Reward: 63.0  | Average Reward 54.85  | Actor loss: 0.07 | Critic loss: 12.21 | Entropy loss: -0.0115  | Total Loss: 12.26 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step 

Step: 250
Training  | Episode: 3073/200000  | Episode Reward: 18.0  | Average Reward 54.51  | Actor loss: -0.37 | Critic loss: 7.42 | Entropy loss: -0.0097  | Total Loss: 7.04 | Total Steps: 250
--- target colour: blue, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3074/200000  | Episode Reward: 46.0  | Average Reward 54.45  | Actor loss: -0.12 | Critic loss: 4.62 | Entropy loss: -0.0095  | Total Loss: 4.49 | Total Steps: 250
-

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3083/200000  | Episode Reward: 47.0  | Average Reward 54.49  | Actor loss: -0.01 | Critic loss: 7.51 | Entropy loss: -0.0144  | Total Loss: 7.49 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward:

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3092/200000  | Episode Reward: 12.0  | Average Reward 54.56  | Actor loss: -0.49 | Critic loss: 8.13 | Entropy loss: -0.0111  | Total Loss: 7.63 | Total Steps: 250
--- target colour: red, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward:

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3102/200000  | Episode Reward: 50.0  | Average Reward 54.52  | Actor loss: -0.35 | Critic loss: 5.10 | Entropy loss: -0.0121  | Total Loss: 4.74 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3111/200000  | Episode Reward: 53.0  | Average Reward 54.65  | Actor loss: -0.03 | Critic loss: 5.47 | Entropy loss: -0.0105  | Total Loss: 5.44 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3120/200000  | Episode Reward: 56.0  | Average Reward 54.70  | Actor loss: -0.02 | Critic loss: 5.79 | Entropy loss: -0.0067  | Total Loss: 5.76 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3129/200000  | Episode Reward: 93.0  | Average Reward 55.17  | Actor loss: 0.44 | Critic loss: 21.14 | Entropy loss: -0.0134  | Total Loss: 21.57 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal ste

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3138/200000  | Episode Reward: 44.0  | Average Reward 55.40  | Actor loss: -0.16 | Critic loss: 8.98 | Entropy loss: -0.0043  | Total Loss: 8.82 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in termina

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3147/200000  | Episode Reward: 44.0  | Average Reward 55.48  | Actor loss: -0.22 | Critic loss: 10.88 | Entropy loss: -0.0076  | Total Loss: 10.65 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Training  | Episode: 3155/200000  | Episode Reward: 63.0  | Average Reward 55.48  | Actor loss: -0.05 | Critic loss: 5.67 | Entropy loss: -0.0081  | Total Loss: 5.61 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3156/200000  | Episode Reward: 63.0  | Average R

Training  | Episode: 3164/200000  | Episode Reward: 59.0  | Average Reward 55.58  | Actor loss: 0.14 | Critic loss: 6.16 | Entropy loss: -0.0110  | Total Loss: 6.29 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3165/200000  | Episode Reward: 37.0  | Average Reward 55.52  | Actor loss: -0.17 | Critic loss: 7.47 | E

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3174/200000  | Episode Reward: 91.0  | Average Reward 55.65  | Actor loss: 0.28 | Critic loss: 22.02 | Entropy loss: -0.0097  | Total Loss: 22.30 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward

Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3184/200000  | Episode Reward: 57.0  | Average Reward 56.27  | Actor loss: -0.02 | Critic loss: 11.56 | Entropy loss: -0.0067  | Total Loss: 11.54 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3193/200000  | Episode Reward: 47.0  | Average Reward 56.19  | Actor loss: -0.14 | Critic loss: 11.61 | Entropy loss: -0.0126  | Total Loss: 11.46 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in term

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 56.875. Model has been saved-----
Training  | Episode: 3202/200000  | Episode Reward: 56.0  | Average Reward 56.88  | Actor loss: -0.11 | Critic loss: 5.53 | Entropy loss: -0.0128  | Total Loss: 5.41 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal st

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
-----The best score for averaging previous 200 episode reward is 57.645. Model has been saved-----
Training  | Episode: 3211/200000  | Episode Reward: 76.0  | Average Reward 57.65  | Actor loss: -0.00 | Critic loss: 10.21 | Entropy loss: -0.0045  | Total Loss: 10.20 | Total Steps

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3219/200000  | Episode Reward: 40.0  | Average Reward 57.57  | Actor loss: -0.24 | Critic loss: 10.43 | Entropy loss: -0.0076  | Total Loss: 10.18 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3228/200000  | Episode Reward: 76.0  | Average Reward 57.67  | Actor loss: -0.02 | Critic loss: 25.69 | Entropy loss: -0.0115  | Total Loss: 25.66 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step re

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3237/200000  | Episode Reward: 59.0  | Average Reward 57.53  | Actor loss: -0.14 | Critic loss: 6.15 | Entropy loss: -0.0093  | Total Loss: 5.99 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3247/200000  | Episode Reward: 43.0  | Average Reward 57.16  | Actor loss: -0.07 | Critic loss: 5.80 | Entropy loss: -0.0025  | Total Loss: 5.73 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3256/200000  | Episode Reward: 40.0  | Average Reward 57.08  | Actor loss: -0.05 | Critic loss: 4.54 | Entropy loss: -0.0075  | Total Loss: 4.48 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3265/200000  | Episode Reward: 24.0  | Average Reward 57.23  | Actor loss: -0.41 | Critic loss: 9.46 | Entropy loss: -0.0113  | Total Loss: 9.04 | Total Steps: 250
--- target colour: blue, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 58.03. Model has been saved-----
Training  | Episode: 3274/200000  | Episode Reward: 79.0  | Average Reward 58.03  | Actor loss: 0.03 | Critic loss: 11.04 | Entropy loss: -0.0034  | Total Loss: 11.06 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3283/200000  | Episode Reward: 46.0  | Average Reward 58.27  | Actor loss: -0.09 | Critic loss: 3.87 | Entropy loss: -0.0050  | Total Loss: 3.78 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 58.69. Model has been saved-----
Training  | Episode: 3292/200000  | Episode Reward: 50.0  | Average Reward 58.69  | Actor loss: -0.14 | Critic loss: 8.47 | Entropy loss: -0.0056  | Total Loss: 8.32 | Total Steps: 250
--- target colour: red, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Ag

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
-----The best score for averaging previous 200 episode reward is 58.695. Model has been saved-----
Training  | Episode: 3301/200000  | Episode Reward: 83.0  | Average Reward 58.70  | Actor loss: 0.19 | Cr

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3309/200000  | Episode Reward: 49.0  | Average Reward 58.87  | Actor loss: -0.08 | Critic loss: 6.77 | Entropy loss: -0.0108  | Total Loss: 6.68 | Total Steps: 250
--- target colour: black, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3310/200000 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3318/200000  | Episode Reward: 82.0  | Average Reward 59.02  | Actor loss: 0.21 | Critic loss: 12.47 | Entropy loss: -0.0075  | Total Loss: 12.67 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3327/200000  | Episode Reward: 33.0  | Average Reward 59.12  | Actor loss: -0.27 | Critic loss: 5.23 | Entropy loss: -0.0083  | Total Loss: 4.95 | Total Steps: 250
--- target colour: green, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3337/200000  | Episode Reward: 29.0  | Average Reward 57.96  | Actor loss: -0.19 | Critic loss: 4.16 | Entropy loss: -0.0073  | Total Loss: 3.96 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3346/200000  | Episode Reward: 31.0  | Average Reward 57.67  | Actor loss: -0.24 | Critic loss: 10.52 | Entropy loss: -0.0067  | Total Loss: 10.28 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3355/200000  | Episode Reward: 73.0  | Average Reward 57.55  | Actor loss: -0.03 | Critic loss: 16.98 | Entropy loss: -0.0061  | Total Loss: 16.94 | Total Steps: 250
--- target colour: green, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step 

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3364/200000  | Episode Reward: 61.0  | Average Reward 58.05  | Actor loss: -0.03 | Critic loss: 14.64 | Entropy loss: -0.0138  | Total Loss: 14.60 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3373/200000  | Episode Reward: 86.0  | Average Reward 58.17  | Actor loss: 0.07 | Critic loss: 26.63 | Entropy loss: -0.0058  | Total Loss: 26.69 | Total Steps: 250
--- target colour: green, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3382/200000  | Episode Reward: 56.0  | Average Reward 58.52  | Actor loss: 0.05 | Critic loss: 9.76 | Entropy loss: -0.0084  | Total Loss: 9.80 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal step

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3391/200000  | Episode Reward: 69.0  | Average Reward 58.61  | Actor loss: 0.08 | Critic loss: 6.42 | Entropy loss: -0.0080  | Total Loss: 6.49 | Total Steps: 250
--- target colour: blue, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3392/200000  | E

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3401/200000  | Episode Reward: 63.0  | Average Reward 57.55  | Actor loss: -0.25 | Critic loss: 10.06 | Entropy loss: -0.0107  | Total Loss: 9.80 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3410/200000  | Episode Reward: 76.0  | Average Reward 57.55  | Actor loss: 0.19 | Critic loss: 12.75 | Entropy loss: -0.0134  | Total Loss: 12.93 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewa

Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3419/200000  | Episode Reward: 37.0  | Average Reward 57.19  | Actor loss: -0.11 | Critic loss: 5.70 | Entropy loss: -0.0067  | Total Loss: 5.57 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3420/200000

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3429/200000  | Episode Reward: 69.0  | Average Reward 57.34  | Actor loss: 0.09 | Critic loss: 11.70 | Entropy loss: -0.0103  | Total Loss: 11.79 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rew

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3438/200000  | Episode Reward: 60.0  | Average Reward 57.16  | Actor loss: -0.03 | Critic loss: 6.43 | Entropy loss: -0.0094  | Total Loss: 6.39 | Total Steps: 250
--- target colour: black, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3439/200000  | Episode Reward: 46.0 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3447/200000  | Episode Reward: 86.0  | Average Reward 57.81  | Actor loss: 0.41 | Critic loss: 15.92 | Entropy loss: -0.0072  | Total Loss: 16.32 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3456/200000  | Episode Reward: 68.0  | Average Reward 57.83  | Actor loss: 0.02 | Critic loss: 3.04 | Entropy loss: -0.0031  | Total Loss: 3.05 | Total Steps: 250
--- target colour: red, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3466/200000  | Episode Reward: 53.0  | Average Reward 57.66  | Actor loss: -0.16 | Critic loss: 4.29 | Entropy loss: -0.0094  | Total Loss: 4.12 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step re

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3475/200000  | Episode Reward: 81.0  | Average Reward 57.15  | Actor loss: -0.05 | Critic loss: 13.34 | Entropy loss: -0.0074  | Total Loss: 13.28 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal s

Training  | Episode: 3484/200000  | Episode Reward: 46.0  | Average Reward 57.27  | Actor loss: -0.11 | Critic loss: 4.21 | Entropy loss: -0.0079  | Total Loss: 4.09 | Total Steps: 250
--- target colour: yellow, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3485/200000  | Episode Reward: 50.0  | Average Reward 57.24  | Actor loss: 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3494/200000  | Episode Reward: 53.0  | Average Reward 57.56  | Actor loss: -0.00 | Critic loss: 5.26 | Entropy loss: -0.0099  | Total Loss: 5.24 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3503/200000  | Episode Reward: 66.0  | Average Reward 57.23  | Actor loss: -0.12 | Critic loss: 6.45 | Entropy loss: -0.0049  | Total Loss: 6.32 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3512/200000  | Episode Reward: 80.0  | Average Reward 56.98  | Actor loss: 0.20 | Critic loss: 13.52 | Entropy loss: -0.0073  | Total Loss: 13.71 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step re

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3522/200000  | Episode Reward: 43.0  | Average Reward 56.28  | Actor loss: -0.29 | Critic loss: 9.44 | Entropy loss: -0.0105  | Total Loss: 9.14 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3532/200000  | Episode Reward: 34.0  | Average Reward 56.28  | Actor loss: 0.03 | Critic loss: 9.85 | Entropy loss: -0.0078  | Total Loss: 9.87 | Total Steps: 250
--- target colour: blue, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3541/200000  | Episode Reward: 50.0  | Average Reward 56.17  | Actor loss: -0.04 | Critic loss: 13.21 | Entropy loss: -0.0034  | Total Loss: 13.16 | Total Steps: 250
--- target colour: red, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal s

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3550/200000  | Episode Reward: 66.0  | Average Reward 56.16  | Actor loss: 0.17 | Critic loss: 8.53 | Entropy loss: -0.0066  | Total Loss: 8.69 | Total Steps: 250
--- target colour: blue, target object: prism ---
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3559/200000  | Episode Reward: 33.0  | Average Reward 56.04  | Actor loss: -0.07 | Critic loss: 6.92 | Entropy loss: -0.0031  | Total Loss: 6.84 | Total Steps: 250
--- target colour: red, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3569/200000  | Episode Reward: 93.0  | Average Reward 55.88  | Actor loss: -0.01 | Critic loss: 15.73 | Entropy loss: -0.0042  | Total Loss: 15.71 | Total Steps: 250
--- target colour

Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3578/200000  | Episode Reward: 85.0  | Average Reward 55.83  | Actor loss: 0.11 | Critic loss: 14.95 | Entropy loss: -0.0026  | Total Loss: 15.06 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step 

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3587/200000  | Episode Reward: 96.0  | Average Reward 55.53  | Actor loss: 0.33 | Critic loss: 19.62 | Entropy loss: -0.0072  | Total Loss: 19.94 | Total Steps: 250
--- target colour: yellow, target object: capsule ---
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in termin

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3596/200000  | Episode Reward: 24.0  | Average Reward 55.42  | Actor loss: -0.15 | Critic loss: 10.84 | Entropy loss: -0.0042  | Total Loss: 10.69 | Total Steps: 250
--- target colour: yellow, target object: sphere ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3597/200000 

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3606/200000  | Episode Reward: 63.0  | Average Reward 55.65  | Actor loss: -0.15 | Critic loss: 7.13 | Entropy loss: -0.0061  | Total Loss: 6.97 | Total Steps: 250
--- target colour: black, target object: prism ---
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3615/200000  | Episode Reward: 46.0  | Average Reward 55.29  | Actor loss: -0.06 | Critic loss: 4.36 | Entropy loss: -0.0063  | Total Loss: 4.29 | Total Steps: 250
--- target colour: black, target object: capsule ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step rewar

Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Step: 250
Training  | Episode: 3624/200000  | Episode Reward: 14.0  | Average Reward 55.22  | Actor loss: -0.44 | Critic loss: 10.17 | Entropy loss: -0.0100  | Total Loss: 9.72 | Total Steps: 250
--- target colour: black, target object: cube ---
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step rewar

Agent in terminal steps
Terminal Step reward: 10.0
Step: 250
Training  | Episode: 3633/200000  | Episode Reward: 27.0  | Average Reward 54.76  | Actor loss: -0.17 | Critic loss: 5.31 | Entropy loss: -0.0073  | Total Loss: 5.14 | Total Steps: 250
--- target colour: black, target object: prism ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3634/200000  | Episode Reward: 39.0  | Average Reward 54.73  | Actor loss: -0.05 | Critic loss: 3.81 | Entropy loss: -0.0034  | Total Loss: 3.76 | Total Steps: 250
--- target colour: black, t

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3643/200000  | Episode Reward: 63.0  | Average Reward 54.61  | Actor loss: 0.02 | Critic loss: 7.69 | Entropy loss: -0.0058  | Total Loss: 7.71 | Total Steps: 250
--- target colour: green, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3652/200000  | Episode Reward: 82.0  | Average Reward 54.13  | Actor loss: 0.33 | Critic loss: 22.33 | Entropy loss: -0.0066  | Total Loss: 22.65 | Total Steps: 250
--- target colour: yellow, target object: cylinder ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Trai

Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Step: 250
Training  | Episode: 3662/200000  | Episode Reward: 43.0  | Average Reward 53.88  | Actor loss: -0.32 | Critic loss: 5.48 | Entropy loss: -0.0126  | Total Loss: 5.15 | Total Steps: 250
--- target colour: green, target object: sphere ---
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward: 10.0
Decision Step reward: -3
Decision Step reward: -3
Agent in terminal steps
Terminal Step reward:

In [None]:
# # run this when you stop training prematurely
# data = {
#             'all_average_reward': all_average_reward,
#             'all_episode_reward': all_episode_reward,
#             'all_actor_loss': all_actor_loss,
#             'all_critic_loss': all_critic_loss,
#             'all_entropy_loss': all_entropy_loss,
#             'all_total_loss': all_total_loss,
#             'all_steps': all_steps,
#         } 
# file_path = f'result/{ALG_NAME}_{ENV_ID}.txt'
# with open(file_path, 'w') as file:
#     json.dump(data, file)

# Test

In [None]:
from torch.distributions import Categorical

device = torch.device("cpu")

episode = 9190
speed = 1
MAX_STEPS = 500
TEST_EPISODES = 100
ALG_NAME = 'S0_with'
ENV_ID = '3'
tracked_agent = -1

env.reset()
agent = Agent(num_words, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
agent.load(episode,ALG_NAME,ENV_ID)
average = 0
            
for episode in range(TEST_EPISODES):
            episode_reward = 0
            # env.reset()
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            STEPS = 0
            decision_steps, terminal_steps = env.get_steps(behavior_name)

            # state -- vt, lt, lstm        
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device) #vector image
            object_index = int(decision_steps.obs[1][0][0])
            colour_index = int(decision_steps.obs[1][0][1])
            print(f'--- target colour: {colour_hashmap[colour_index]}, target object: {object_hashmap[object_index]} ---')
            # objects: 0-capsule,1-cube,2-cylinder,3-prism,4-sphere 
            # colours: 0-red,1-green,2-blue,3-yellow,4-black 
            
#             lt = torch.eye(num_words)[:, index].to(device) #one hot encoder language vector
            lt = torch.zeros(35).to(device)
            lt[object_index],lt[colour_index+5] = 1,1 #one hot encoder language vector
        
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            done = False
            while not done:
                STEPS += 1                
                lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                # value = value.detach()
                dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
                

                action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
                # action_dist = Categorical(F.softmax(policy_dist,dim=1))
                action = action_dist.sample() # sample an action from action_dist
                action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()

                discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                action_tuple = ActionTuple()
                action_tuple.add_discrete(discrete_actions)
                env.set_actions(behavior_name,action_tuple)
                env.step()
                decision_steps, terminal_steps = env.get_steps(behavior_name)

                if tracked_agent == -1 and len(decision_steps) >= 1:
                    tracked_agent = decision_steps.agent_id[0]
                    # print(tracked_agent)

                if tracked_agent in terminal_steps: # roll over or hit the target
                    print('Agent in terminal steps')
                    done = True
                    reward = terminal_steps[tracked_agent].reward
                    if reward > 0:
                        pass
                    else: reward = -1 # roll over or other unseen conditions

                    print(f'Terminal Step reward: {reward}')

                elif tracked_agent in decision_steps: # the agent which requires action
                    reward = decision_steps[tracked_agent].reward
                    # print(f'Decision Step reward: {reward}')
                    # if reward<0:
                    #     print(f'Decision Step reward: {reward}')

                if STEPS >= MAX_STEPS:
                        reward = -10
                        print(f'Max Step Reward: {reward}')
                        env.reset()
                        done = True

                episode_reward = episode_reward + reward
                vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                vt = vt_new
            average += episode_reward / TEST_EPISODES
            print(f'Episode: {episode}, Episode reward: {episode_reward}')
print(f'Average Episode Reward: {average}')


# Random Action

In [None]:
# Try to find the optimized MAX_STEP
# speed 1: 10000+ speed 2:1822 speed 3: 1918 speed 4: 900 speed 5: 951 speed 6:964 speed 7: 1181 
import random
import torch
import torch.nn.functional as F
# speed = 1
TRAIN_EPISODES = 20
tracked_agent = -1
num_actions = 4
average = 0
# for speed in range(30,70,5):
#     speed = speed/10
speed = 3
for episode in range(TRAIN_EPISODES):
    env.reset()
    behavior_name=list(env.behavior_specs)[0]
    step = 0
    while True:
        index = random.randint(0, 3) # sample an action from action_dist
        action_onehot = F.one_hot(torch.tensor(index),num_actions).cpu()
        discrete_actions = np.array(action_onehot).reshape(1,4)*speed #[forward, backward, right, left]
        action_tuple = ActionTuple()
        action_tuple.add_discrete(discrete_actions)
        env.set_actions(behavior_name,action_tuple)
        env.step()
        step += 1

        decision_steps, terminal_steps = env.get_steps(behavior_name)
        if tracked_agent == -1 and len(decision_steps) >= 1:
            tracked_agent = decision_steps.agent_id[0]
            
        if tracked_agent in terminal_steps: # roll over or hit the target
            reward = terminal_steps[tracked_agent].reward
            if reward > 0: # hit the target
                print(f'{episode}: {step} in total')
                average += step
                break
            else:           # roll over or other conditions
                env.reset()
                step = 0
                continue # roll over or other unseen conditions
        if tracked_agent in decision_steps: # the agent which requires action
            continue
average /= TRAIN_EPISODES
print(f'For speed {speed}, average random step for hitting the target is {average}')
      

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
object_sizes = [1, 2, 3, 4, 5]  # Object sizes on the x-axis
max_steps = [[4721, 2468, 1587, 1249, 1084],   # Max steps for each object size
             [4027, 1550, 738, 640, 505],
             [3320, 1391, 464, 331, 307],
             [3289, 882, 679, 424, 211],
             [3057, 1203, 470, 378, 222]]
speeds = [1, 2, 3, 4, 5]  # Discrete speeds for color-coding

# Color mapping for each speed
speed_color_mapping = {
    1: 'red',
    2: 'blue',
    3: 'green',
    4: 'orange',
    5: 'purple'
}

# Generate scatter plot
for i, size in enumerate(object_sizes):
    for j, steps in enumerate(max_steps[i]):
        speed = speeds[j]
        color = speed_color_mapping[speed]
        plt.scatter(size, steps, c=color)

# Set labels and title
plt.xlabel('Object Size')
plt.ylabel('Max Step')
plt.title('Scatter Plot')

plt.xlim(0, 6)
plt.xticks(np.arange(0, 7, 1))
plt.ylim(0, 5000)
plt.yticks(np.arange(0, 5001, 200))

# Create legend
legend_labels = [f'Speed {speed}' for speed, _ in speed_color_mapping.items()]
plt.legend(legend_labels, loc='upper right')

# Display the plot
plt.show()


